Loop Id: 580 | Module: exec | Source: BsplineFunctor.h:305-336 | Coverage: 0.01% |
---|
Loop Id: 580 | Module: exec | Source: BsplineFunctor.h:305-336 | Coverage: 0.01% |
---|
0x445978 VMOVAPD (%R12,%R8,2),%YMM11 [10] |
0x44597e VMOVAPD 0x20(%R12,%R8,2),%YMM10 [10] |
0x445985 KMOVB %K1,%K2 |
0x445989 KMOVB %K1,%K5 |
0x44598d KMOVB %K1,%K4 |
0x445991 KMOVB %K1,%K6 |
0x445995 VMOVAPD 0x500(%RSP),%YMM9 [18] |
0x44599e KMOVB %K1,%K3 |
0x4459a2 VMULPD %YMM18,%YMM11,%YMM15 |
0x4459a8 KMOVB %K1,%K7 |
0x4459ac VMOVAPD 0x460(%RSP),%YMM24 [18] |
0x4459b4 VMOVDQA (%R15,%R8,1),%YMM4 [1] |
0x4459ba VMULPD %YMM18,%YMM10,%YMM0 |
0x4459c0 VMOVAPD %YMM9,%YMM14 |
0x4459c5 ADD $0x20,%R8 |
0x4459c9 VRNDSCALEPD $0xb,%YMM15,%YMM7 |
0x4459d0 VCVTTPD2DQ %YMM15,%XMM5 |
0x4459d5 VGATHERDPD (%RAX,%XMM5,8),%YMM22{%K2} [2] |
0x4459dc KMOVB %K1,%K2 |
0x4459e0 VRNDSCALEPD $0xb,%YMM0,%YMM13 |
0x4459e7 VCVTTPD2DQ %YMM0,%XMM2 |
0x4459eb VINSERTI128 $0x1,%XMM2,%YMM5,%YMM6 |
0x4459f1 VPADDD 0x8a0e7(%RIP),%YMM6,%YMM8 [14] |
0x4459f9 VPERM2I128 $0x11,%YMM6,%YMM6,%YMM12 |
0x4459ff VGATHERDPD (%RAX,%XMM12,8),%YMM21{%K3} [8] |
0x445a06 KMOVB %K1,%K3 |
0x445a0a VGATHERDPD (%RAX,%XMM8,8),%YMM23{%K4} [6] |
0x445a11 KMOVB %K1,%K4 |
0x445a15 VSUBPD %YMM7,%YMM15,%YMM1 |
0x445a19 VPADDD 0x8a0df(%RIP),%YMM6,%YMM15 [14] |
0x445a21 VPERM2I128 $0x11,%YMM8,%YMM8,%YMM7 |
0x445a27 VPADDD 0x8a0f1(%RIP),%YMM6,%YMM6 [14] |
0x445a2f VSUBPD %YMM13,%YMM0,%YMM0 |
0x445a34 VGATHERDPD (%RAX,%XMM7,8),%YMM5{%K5} [5] |
0x445a3b KMOVB %K1,%K5 |
0x445a3f VGATHERDPD (%RAX,%XMM15,8),%YMM8{%K6} [17] |
0x445a46 VPERM2I128 $0x11,%YMM15,%YMM15,%YMM13 |
0x445a4c VMOVAPD 0x520(%RSP),%YMM15 [18] |
0x445a55 KMOVB %K1,%K6 |
0x445a59 VGATHERDPD (%RAX,%XMM6,8),%YMM20{%K2} [3] |
0x445a60 VPERM2I128 $0x11,%YMM6,%YMM6,%YMM7 |
0x445a66 VMOVAPD 0x540(%RSP),%YMM6 [18] |
0x445a6f VGATHERDPD (%RAX,%XMM13,8),%YMM12{%K7} [9] |
0x445a76 VMULPD %YMM1,%YMM1,%YMM2 |
0x445a7a VGATHERDPD (%RAX,%XMM7,8),%YMM13{%K3} [4] |
0x445a81 KMOVB %K1,%K7 |
0x445a85 KMOVB %K1,%K2 |
0x445a89 VMULPD %YMM0,%YMM0,%YMM3 |
0x445a8d VMOVAPD %YMM6,%YMM7 |
0x445a91 KMOVB %K1,%K3 |
0x445a95 VFMADD132PD %YMM1,%YMM15,%YMM7 |
0x445a9a VFMADD132PD %YMM0,%YMM15,%YMM6 |
0x445a9f VMOVAPD 0x4e0(%RSP),%YMM15 [18] |
0x445aa8 VFMADD132PD %YMM1,%YMM15,%YMM14 |
0x445aad VFMADD132PD %YMM0,%YMM15,%YMM9 |
0x445ab2 VMOVAPD 0x4c0(%RSP),%YMM15 [18] |
0x445abb VMULPD %YMM2,%YMM1,%YMM17 |
0x445ac1 VMULPD %YMM3,%YMM0,%YMM16 |
0x445ac7 VMOVAPD %YMM15,%YMM19 |
0x445acd VMULPD %YMM9,%YMM5,%YMM9 |
0x445ad2 VMULPD %YMM14,%YMM23,%YMM14 |
0x445ad8 VFMADD132PD %YMM21,%YMM9,%YMM6 |
0x445ade VMOVAPD 0x480(%RSP),%YMM9 [18] |
0x445ae7 VFMADD132PD %YMM22,%YMM14,%YMM7 |
0x445aed VMOVAPD 0x4a0(%RSP),%YMM14 [18] |
0x445af6 VFMADD132PD %YMM1,%YMM14,%YMM19 |
0x445afc VFMADD132PD %YMM0,%YMM14,%YMM15 |
0x445b01 VMOVAPD %YMM9,%YMM14 |
0x445b06 VFMADD132PD %YMM1,%YMM24,%YMM14 |
0x445b0c VFMADD132PD %YMM0,%YMM24,%YMM9 |
0x445b12 VMULPD %YMM14,%YMM20,%YMM14 |
0x445b18 VMULPD %YMM9,%YMM13,%YMM9 |
0x445b1d VFMADD231PD %YMM19,%YMM8,%YMM14 |
0x445b23 VMOVDQA32 %XMM4,%XMM19 |
0x445b29 VFMADD231PD %YMM15,%YMM12,%YMM9 |
0x445b2e VMOVAPD 0x440(%RSP),%YMM15 [18] |
0x445b37 VADDPD %YMM14,%YMM7,%YMM7 |
0x445b3c VADDPD %YMM9,%YMM6,%YMM6 |
0x445b41 VMULPD %YMM15,%YMM7,%YMM14 |
0x445b46 VBROADCASTSD 0x89a21(%RIP),%YMM7 [14] |
0x445b4f VMULPD %YMM15,%YMM6,%YMM9 |
0x445b54 VDIVPD %YMM11,%YMM7,%YMM11 |
0x445b59 VSCATTERDPD %YMM14,(%R9,%XMM4,8){%K4} [7] |
0x445b60 VPERM2I128 $0x11,%YMM4,%YMM4,%YMM4 |
0x445b66 VSCATTERDPD %YMM9,(%R9,%XMM4,8){%K5} [16] |
0x445b6d VMOVAPD 0x400(%RSP),%YMM6 [18] |
0x445b76 VMOVAPD 0x3e0(%RSP),%YMM15 [18] |
0x445b7f VMOVAPD 0x420(%RSP),%YMM14 [18] |
0x445b88 VMOVAPD 0x3a0(%RSP),%YMM9 [18] |
0x445b91 VDIVPD %YMM10,%YMM7,%YMM10 |
0x445b96 VMOVAPD %YMM6,%YMM7 |
0x445b9a VFMADD132PD %YMM1,%YMM15,%YMM7 |
0x445b9f VFMADD132PD %YMM0,%YMM15,%YMM6 |
0x445ba4 VMOVAPD 0x380(%RSP),%YMM15 [18] |
0x445bad VFMADD231PD %YMM14,%YMM2,%YMM7 |
0x445bb2 VFMADD231PD %YMM14,%YMM3,%YMM6 |
0x445bb7 VMOVAPD %YMM9,%YMM14 |
0x445bbc VFMADD132PD %YMM1,%YMM15,%YMM14 |
0x445bc1 VFMADD132PD %YMM0,%YMM15,%YMM9 |
0x445bc6 VMOVAPD 0x3c0(%RSP),%YMM15 [18] |
0x445bcf VMULPD %YMM18,%YMM11,%YMM11 |
0x445bd5 VFMADD231PD %YMM15,%YMM2,%YMM14 |
0x445bda VFMADD231PD %YMM15,%YMM3,%YMM9 |
0x445bdf VMULPD %YMM14,%YMM23,%YMM14 |
0x445be5 VMULPD %YMM9,%YMM5,%YMM9 |
0x445bea VFMADD132PD %YMM22,%YMM14,%YMM7 |
0x445bf0 VMOVAPD 0x340(%RSP),%YMM14 [18] |
0x445bf9 VMOVAPD 0x320(%RSP),%YMM15 [18] |
0x445c02 VFMADD132PD %YMM21,%YMM9,%YMM6 |
0x445c08 VMOVAPD 0x360(%RSP),%YMM9 [18] |
0x445c11 VMOVAPD %YMM14,%YMM24 |
0x445c17 VFMADD132PD %YMM0,%YMM15,%YMM14 |
0x445c1c VFMADD132PD %YMM1,%YMM15,%YMM24 |
0x445c22 VMOVAPD 0x300(%RSP),%YMM15 [18] |
0x445c2b VMULPD %YMM18,%YMM10,%YMM10 |
0x445c31 VFMADD231PD %YMM9,%YMM3,%YMM14 |
0x445c36 VFMADD231PD %YMM9,%YMM2,%YMM24 |
0x445c3c VMOVAPD %YMM15,%YMM9 |
0x445c41 VFMADD213PD 0x580(%RSP),%YMM1,%YMM9 [18] |
0x445c4b VFMADD213PD 0x580(%RSP),%YMM0,%YMM15 [18] |
0x445c55 VFMADD231PD 0x5a0(%RSP),%YMM2,%YMM9 [18] |
0x445c5f VFMADD231PD 0x5a0(%RSP),%YMM3,%YMM15 [18] |
0x445c69 VMULPD %YMM9,%YMM20,%YMM9 |
0x445c6f VMULPD %YMM15,%YMM13,%YMM15 |
0x445c74 VFMADD231PD %YMM24,%YMM8,%YMM9 |
0x445c7a VFMADD132PD %YMM12,%YMM15,%YMM14 |
0x445c7f VADDPD %YMM9,%YMM7,%YMM7 |
0x445c84 VADDPD %YMM14,%YMM6,%YMM6 |
0x445c89 VMULPD %YMM11,%YMM7,%YMM11 |
0x445c8e VMULPD %YMM10,%YMM6,%YMM10 |
0x445c93 VSCATTERDPD %YMM11,(%R11,%XMM19,8){%K6} [12] |
0x445c9a VSCATTERDPD %YMM10,(%R11,%XMM4,8){%K7} [15] |
0x445ca1 VMOVAPD 0x2c0(%RSP),%YMM14 [18] |
0x445caa VMOVAPD 0x2a0(%RSP),%YMM6 [18] |
0x445cb3 VMOVAPD 0x2e0(%RSP),%YMM9 [18] |
0x445cbc VMOVAPD 0x280(%RSP),%YMM11 [18] |
0x445cc5 VMULPD %YMM14,%YMM2,%YMM15 |
0x445cca VMOVAPD %YMM6,%YMM10 |
0x445cce VMULPD %YMM14,%YMM3,%YMM7 |
0x445cd3 VFMADD132PD %YMM1,%YMM11,%YMM10 |
0x445cd8 VMOVAPD 0x240(%RSP),%YMM14 [18] |
0x445ce1 VFMADD132PD %YMM0,%YMM11,%YMM6 |
0x445ce6 VMULPD %YMM14,%YMM2,%YMM11 |
0x445ceb VMULPD %YMM14,%YMM3,%YMM14 |
0x445cf0 VFMADD231PD %YMM9,%YMM17,%YMM15 |
0x445cf6 VFMADD231PD %YMM9,%YMM16,%YMM7 |
0x445cfc VMOVAPD 0x220(%RSP),%YMM9 [18] |
0x445d05 VADDPD %YMM15,%YMM10,%YMM10 |
0x445d0a VMOVAPD 0x260(%RSP),%YMM15 [18] |
0x445d13 VADDPD %YMM6,%YMM7,%YMM7 |
0x445d17 VMOVAPD %YMM9,%YMM6 |
0x445d1b VFMADD231PD %YMM15,%YMM17,%YMM11 |
0x445d21 VFMADD231PD %YMM15,%YMM16,%YMM14 |
0x445d27 VMOVAPD 0x200(%RSP),%YMM15 [18] |
0x445d30 VFMADD132PD %YMM1,%YMM15,%YMM6 |
0x445d35 VFMADD132PD %YMM0,%YMM15,%YMM9 |
0x445d3a VMOVAPD %YMM1,%YMM15 |
0x445d3e VFMADD132PD %YMM26,%YMM25,%YMM1 |
0x445d44 VFMADD132PD %YMM29,%YMM28,%YMM15 |
0x445d4a VADDPD %YMM14,%YMM9,%YMM14 |
0x445d4f VADDPD %YMM11,%YMM6,%YMM11 |
0x445d54 VMOVAPD %YMM0,%YMM9 |
0x445d58 VFMADD132PD %YMM26,%YMM25,%YMM0 |
0x445d5e VFMADD132PD %YMM29,%YMM28,%YMM9 |
0x445d64 VMULPD %YMM23,%YMM11,%YMM6 |
0x445d6a VMOVAPD 0x1e0(%RSP),%YMM11 [18] |
0x445d73 VMULPD %YMM14,%YMM5,%YMM5 |
0x445d78 VFMADD231PD %YMM10,%YMM22,%YMM6 |
0x445d7e VMULPD %YMM30,%YMM3,%YMM10 |
0x445d84 VFMADD231PD %YMM7,%YMM21,%YMM5 |
0x445d8a VMULPD %YMM27,%YMM3,%YMM3 |
0x445d90 VMULPD %YMM30,%YMM2,%YMM7 |
0x445d96 VMULPD %YMM27,%YMM2,%YMM2 |
0x445d9c VFMADD231PD %YMM31,%YMM16,%YMM10 |
0x445da2 VFMADD231PD %YMM11,%YMM16,%YMM3 |
0x445da8 VFMADD231PD %YMM31,%YMM17,%YMM7 |
0x445dae VFMADD231PD %YMM11,%YMM17,%YMM2 |
0x445db4 VADDPD %YMM3,%YMM0,%YMM0 |
0x445db8 VADDPD %YMM15,%YMM7,%YMM14 |
0x445dbd VADDPD %YMM9,%YMM10,%YMM7 |
0x445dc2 VADDPD %YMM1,%YMM2,%YMM1 |
0x445dc6 VMULPD %YMM0,%YMM13,%YMM13 |
0x445dca VMULPD %YMM20,%YMM1,%YMM10 |
0x445dd0 VFMADD132PD %YMM7,%YMM13,%YMM12 |
0x445dd5 VFMADD132PD %YMM14,%YMM10,%YMM8 |
0x445dda VADDPD %YMM12,%YMM5,%YMM12 |
0x445ddf VADDPD %YMM6,%YMM8,%YMM8 |
0x445de3 VSCATTERDPD %YMM8,(%RSI,%XMM19,8){%K2} [11] |
0x445dea VSCATTERDPD %YMM12,(%RSI,%XMM4,8){%K3} [13] |
0x445df1 CMP %R8,%R10 |
0x445df4 JNE 445978 |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 305 - 336 |
-------------------------------------------------------------------------------- |
305: real_type r = distArrayCompressed[j]; |
306: int iScatter = distIndices[j]; |
307: real_type rinv = cOne / r; |
308: r *= DeltaRInv; |
309: int iGather = (int)r; |
310: real_type t = r - real_type(iGather); |
311: real_type tp0 = t * t * t; |
312: real_type tp1 = t * t; |
313: real_type tp2 = t; |
314: |
315: real_type sCoef0 = SplineCoefs[iGather + 0]; |
316: real_type sCoef1 = SplineCoefs[iGather + 1]; |
317: real_type sCoef2 = SplineCoefs[iGather + 2]; |
318: real_type sCoef3 = SplineCoefs[iGather + 3]; |
319: |
320: // clang-format off |
321: laplArray[iScatter] = dSquareDeltaRinv * |
322: (sCoef0*( d2A[ 2]*tp2 + d2A[ 3])+ |
323: sCoef1*( d2A[ 6]*tp2 + d2A[ 7])+ |
324: sCoef2*( d2A[10]*tp2 + d2A[11])+ |
325: sCoef3*( d2A[14]*tp2 + d2A[15])); |
326: |
327: gradArray[iScatter] = DeltaRInv * rinv * |
328: (sCoef0*( dA[ 1]*tp1 + dA[ 2]*tp2 + dA[ 3])+ |
329: sCoef1*( dA[ 5]*tp1 + dA[ 6]*tp2 + dA[ 7])+ |
330: sCoef2*( dA[ 9]*tp1 + dA[10]*tp2 + dA[11])+ |
331: sCoef3*( dA[13]*tp1 + dA[14]*tp2 + dA[15])); |
332: |
333: valArray[iScatter] = (sCoef0*(A[ 0]*tp0 + A[ 1]*tp1 + A[ 2]*tp2 + A[ 3])+ |
334: sCoef1*(A[ 4]*tp0 + A[ 5]*tp1 + A[ 6]*tp2 + A[ 7])+ |
335: sCoef2*(A[ 8]*tp0 + A[ 9]*tp1 + A[10]*tp2 + A[11])+ |
336: sCoef3*(A[12]*tp0 + A[13]*tp1 + A[14]*tp2 + A[15])); |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:421 | exec |
○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:181 | exec |
○ | main._omp_fn.0 | miniqmc.cpp:390 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.04 |
CQA speedup if FP arith vectorized | 1.52 |
CQA speedup if fully vectorized | 2.32 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.25 |
Bottlenecks | micro-operation queue, |
Function | miniqmcreference::TwoBodyJastrowRef |
Source | BsplineFunctor.h:305-336 |
Source loop unroll info | unrolled by 8 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | main |
Unroll factor | 8 |
CQA cycles | 81.50 |
CQA cycles if no scalar integer | 78.00 |
CQA cycles if FP arith vectorized | 53.76 |
CQA cycles if fully vectorized | 35.15 |
Front-end cycles | 81.50 |
DIV/SQRT cycles | 65.00 |
P0 cycles | 65.00 |
P1 cycles | 47.50 |
P2 cycles | 47.50 |
P3 cycles | 24.00 |
P4 cycles | 25.00 |
P5 cycles | 2.00 |
P6 cycles | 0.00 |
P7 cycles | 16.00 |
Inter-iter dependencies cycles | 20 |
FE+BE cycles (UFS) | 82.02 - 82.25 |
Stall cycles (UFS) | 29.50 - 29.73 |
Nb insns | 191.00 |
Nb uops | 326.00 |
Nb loads | 47.00 |
Nb stores | 6.00 |
Nb stack references | 30.00 |
FLOP/cycle | 7.56 |
Nb FLOP add-sub | 64.00 |
Nb FLOP mul | 128.00 |
Nb FLOP fma | 208.00 |
Nb FLOP div | 8.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 20.52 |
Bytes prefetched | 0.00 |
Bytes loaded | 1480.00 |
Bytes stored | 192.00 |
Stride 0 | 11.00 |
Stride 1 | 2.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 3.00 |
Vectorization ratio all | 99.43 |
Vectorization ratio load | 97.87 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 96.88 |
Vector-efficiency ratio all | 48.78 |
Vector-efficiency ratio load | 49.20 |
Vector-efficiency ratio store | 50.00 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 50.00 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 50.00 |
Vector-efficiency ratio other | 43.36 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.04 |
CQA speedup if FP arith vectorized | 1.52 |
CQA speedup if fully vectorized | 2.32 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.25 |
Bottlenecks | micro-operation queue, |
Function | miniqmcreference::TwoBodyJastrowRef |
Source | BsplineFunctor.h:305-336 |
Source loop unroll info | unrolled by 8 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | main |
Unroll factor | 8 |
CQA cycles | 81.50 |
CQA cycles if no scalar integer | 78.00 |
CQA cycles if FP arith vectorized | 53.76 |
CQA cycles if fully vectorized | 35.15 |
Front-end cycles | 81.50 |
DIV/SQRT cycles | 65.00 |
P0 cycles | 65.00 |
P1 cycles | 47.50 |
P2 cycles | 47.50 |
P3 cycles | 24.00 |
P4 cycles | 25.00 |
P5 cycles | 2.00 |
P6 cycles | 0.00 |
P7 cycles | 16.00 |
Inter-iter dependencies cycles | 20 |
FE+BE cycles (UFS) | 82.02 - 82.25 |
Stall cycles (UFS) | 29.50 - 29.73 |
Nb insns | 191.00 |
Nb uops | 326.00 |
Nb loads | 47.00 |
Nb stores | 6.00 |
Nb stack references | 30.00 |
FLOP/cycle | 7.56 |
Nb FLOP add-sub | 64.00 |
Nb FLOP mul | 128.00 |
Nb FLOP fma | 208.00 |
Nb FLOP div | 8.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 20.52 |
Bytes prefetched | 0.00 |
Bytes loaded | 1480.00 |
Bytes stored | 192.00 |
Stride 0 | 11.00 |
Stride 1 | 2.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 3.00 |
Vectorization ratio all | 99.43 |
Vectorization ratio load | 97.87 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 96.88 |
Vector-efficiency ratio all | 48.78 |
Vector-efficiency ratio load | 49.20 |
Vector-efficiency ratio store | 50.00 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 50.00 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 50.00 |
Vector-efficiency ratio other | 43.36 |
Path / |
Function | miniqmcreference::TwoBodyJastrowRef |
Source file and lines | BsplineFunctor.h:305-336 |
Module | exec |
nb instructions | 191 |
nb uops | 326 |
loop length | 1154 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 32 |
used zmm registers | 0 |
nb stack references | 30 |
ADD-SUB / MUL ratio | 0.50 |
micro-operation queue | 81.50 cycles |
front end | 81.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 65.00 | 65.00 | 47.50 | 47.50 | 24.00 | 25.00 | 2.00 | 0.00 |
cycles | 65.00 | 65.00 | 47.50 | 47.50 | 24.00 | 25.00 | 2.00 | 0.00 |
Cycles executing div or sqrt instructions | 16.00 |
Longest recurrence chain latency (RecMII) | 20.00 |
FE+BE cycles | 82.02-82.25 |
Stall cycles | 29.50-29.73 |
RS full (events) | 4.82-4.52 |
PRF_FLOAT full (events) | 31.92-32.23 |
Front-end | 81.50 |
Dispatch | 65.00 |
DIV/SQRT | 16.00 |
Data deps. | 20.00 |
Overall L1 | 81.50 |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 100% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 34% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 25% |
all | 49% |
load | 49% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 48% |
all | 48% |
load | 49% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 43% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
VMOVAPD (%R12,%R8,2),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x20(%R12,%R8,2),%YMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVAPD 0x500(%RSP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %YMM18,%YMM11,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVAPD 0x460(%RSP),%YMM24 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVDQA (%R15,%R8,1),%YMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMULPD %YMM18,%YMM10,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM9,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0x20,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VRNDSCALEPD $0xb,%YMM15,%YMM7 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %YMM15,%XMM5 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VGATHERDPD (%RAX,%XMM5,8),%YMM22{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VRNDSCALEPD $0xb,%YMM0,%YMM13 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %YMM0,%XMM2 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VINSERTI128 $0x1,%XMM2,%YMM5,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDD 0x8a0e7(%RIP),%YMM6,%YMM8 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPERM2I128 $0x11,%YMM6,%YMM6,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VGATHERDPD (%RAX,%XMM12,8),%YMM21{%K3} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERDPD (%RAX,%XMM8,8),%YMM23{%K4} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %YMM7,%YMM15,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDD 0x8a0df(%RIP),%YMM6,%YMM15 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPERM2I128 $0x11,%YMM8,%YMM8,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDD 0x8a0f1(%RIP),%YMM6,%YMM6 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VSUBPD %YMM13,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VGATHERDPD (%RAX,%XMM7,8),%YMM5{%K5} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERDPD (%RAX,%XMM15,8),%YMM8{%K6} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VPERM2I128 $0x11,%YMM15,%YMM15,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD 0x520(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERDPD (%RAX,%XMM6,8),%YMM20{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VPERM2I128 $0x11,%YMM6,%YMM6,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD 0x540(%RSP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VGATHERDPD (%RAX,%XMM13,8),%YMM12{%K7} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VMULPD %YMM1,%YMM1,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VGATHERDPD (%RAX,%XMM7,8),%YMM13{%K3} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %YMM0,%YMM0,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM6,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VFMADD132PD %YMM1,%YMM15,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x4e0(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM1,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x4c0(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM2,%YMM1,%YMM17 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM3,%YMM0,%YMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM15,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %YMM9,%YMM5,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM23,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM21,%YMM9,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x480(%RSP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM22,%YMM14,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x4a0(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM1,%YMM14,%YMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM14,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM9,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM1,%YMM24,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM24,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM20,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM9,%YMM13,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM19,%YMM8,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQA32 %XMM4,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD231PD %YMM15,%YMM12,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x440(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %YMM14,%YMM7,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM9,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM15,%YMM7,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD 0x89a21(%RIP),%YMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMULPD %YMM15,%YMM6,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM11,%YMM7,%YMM11 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VSCATTERDPD %YMM14,(%R9,%XMM4,8){%K4} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VPERM2I128 $0x11,%YMM4,%YMM4,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VSCATTERDPD %YMM9,(%R9,%XMM4,8){%K5} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VMOVAPD 0x400(%RSP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x3e0(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x420(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x3a0(%RSP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VDIVPD %YMM10,%YMM7,%YMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VMOVAPD %YMM6,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM1,%YMM15,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x380(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %YMM14,%YMM2,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM14,%YMM3,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM9,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM1,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x3c0(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM18,%YMM11,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM2,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM3,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM23,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM9,%YMM5,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM22,%YMM14,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x340(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x320(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM21,%YMM9,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x360(%RSP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %YMM14,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM0,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM15,%YMM24 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x300(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM18,%YMM10,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM9,%YMM3,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM9,%YMM2,%YMM24 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM15,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD213PD 0x580(%RSP),%YMM1,%YMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x580(%RSP),%YMM0,%YMM15 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x5a0(%RSP),%YMM2,%YMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x5a0(%RSP),%YMM3,%YMM15 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM9,%YMM20,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM15,%YMM13,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM24,%YMM8,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM12,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM9,%YMM7,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM14,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM11,%YMM7,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM10,%YMM6,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %YMM11,(%R11,%XMM19,8){%K6} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VSCATTERDPD %YMM10,(%R11,%XMM4,8){%K7} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VMOVAPD 0x2c0(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x2a0(%RSP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x2e0(%RSP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x280(%RSP),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM14,%YMM2,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM6,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %YMM14,%YMM3,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM11,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x240(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM0,%YMM11,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM2,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM3,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM9,%YMM17,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM9,%YMM16,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x220(%RSP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %YMM15,%YMM10,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x260(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %YMM6,%YMM7,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM9,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD231PD %YMM15,%YMM17,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM16,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x200(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM1,%YMM15,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM1,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM26,%YMM25,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM29,%YMM28,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM14,%YMM9,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM11,%YMM6,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM0,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM26,%YMM25,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM29,%YMM28,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM23,%YMM11,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x1e0(%RSP),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM14,%YMM5,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM10,%YMM22,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM30,%YMM3,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM7,%YMM21,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM27,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM30,%YMM2,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM27,%YMM2,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM16,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM11,%YMM16,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM17,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM11,%YMM17,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM3,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM15,%YMM7,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM9,%YMM10,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM1,%YMM2,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM0,%YMM13,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM20,%YMM1,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM7,%YMM13,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM14,%YMM10,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM12,%YMM5,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM6,%YMM8,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %YMM8,(%RSI,%XMM19,8){%K2} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VSCATTERDPD %YMM12,(%RSI,%XMM4,8){%K3} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
CMP %R8,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 445978 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
Function | miniqmcreference::TwoBodyJastrowRef |
Source file and lines | BsplineFunctor.h:305-336 |
Module | exec |
nb instructions | 191 |
nb uops | 326 |
loop length | 1154 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 32 |
used zmm registers | 0 |
nb stack references | 30 |
ADD-SUB / MUL ratio | 0.50 |
micro-operation queue | 81.50 cycles |
front end | 81.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 65.00 | 65.00 | 47.50 | 47.50 | 24.00 | 25.00 | 2.00 | 0.00 |
cycles | 65.00 | 65.00 | 47.50 | 47.50 | 24.00 | 25.00 | 2.00 | 0.00 |
Cycles executing div or sqrt instructions | 16.00 |
Longest recurrence chain latency (RecMII) | 20.00 |
FE+BE cycles | 82.02-82.25 |
Stall cycles | 29.50-29.73 |
RS full (events) | 4.82-4.52 |
PRF_FLOAT full (events) | 31.92-32.23 |
Front-end | 81.50 |
Dispatch | 65.00 |
DIV/SQRT | 16.00 |
Data deps. | 20.00 |
Overall L1 | 81.50 |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 100% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 34% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 25% |
all | 49% |
load | 49% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 48% |
all | 48% |
load | 49% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 43% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
VMOVAPD (%R12,%R8,2),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x20(%R12,%R8,2),%YMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVAPD 0x500(%RSP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %YMM18,%YMM11,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVAPD 0x460(%RSP),%YMM24 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVDQA (%R15,%R8,1),%YMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMULPD %YMM18,%YMM10,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM9,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0x20,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VRNDSCALEPD $0xb,%YMM15,%YMM7 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %YMM15,%XMM5 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VGATHERDPD (%RAX,%XMM5,8),%YMM22{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VRNDSCALEPD $0xb,%YMM0,%YMM13 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %YMM0,%XMM2 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VINSERTI128 $0x1,%XMM2,%YMM5,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDD 0x8a0e7(%RIP),%YMM6,%YMM8 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPERM2I128 $0x11,%YMM6,%YMM6,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VGATHERDPD (%RAX,%XMM12,8),%YMM21{%K3} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERDPD (%RAX,%XMM8,8),%YMM23{%K4} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %YMM7,%YMM15,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDD 0x8a0df(%RIP),%YMM6,%YMM15 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPERM2I128 $0x11,%YMM8,%YMM8,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDD 0x8a0f1(%RIP),%YMM6,%YMM6 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VSUBPD %YMM13,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VGATHERDPD (%RAX,%XMM7,8),%YMM5{%K5} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERDPD (%RAX,%XMM15,8),%YMM8{%K6} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VPERM2I128 $0x11,%YMM15,%YMM15,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD 0x520(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERDPD (%RAX,%XMM6,8),%YMM20{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VPERM2I128 $0x11,%YMM6,%YMM6,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD 0x540(%RSP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VGATHERDPD (%RAX,%XMM13,8),%YMM12{%K7} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VMULPD %YMM1,%YMM1,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VGATHERDPD (%RAX,%XMM7,8),%YMM13{%K3} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %YMM0,%YMM0,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM6,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VFMADD132PD %YMM1,%YMM15,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x4e0(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM1,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x4c0(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM2,%YMM1,%YMM17 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM3,%YMM0,%YMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM15,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %YMM9,%YMM5,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM23,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM21,%YMM9,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x480(%RSP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM22,%YMM14,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x4a0(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM1,%YMM14,%YMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM14,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM9,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM1,%YMM24,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM24,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM20,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM9,%YMM13,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM19,%YMM8,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQA32 %XMM4,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD231PD %YMM15,%YMM12,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x440(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %YMM14,%YMM7,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM9,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM15,%YMM7,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD 0x89a21(%RIP),%YMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMULPD %YMM15,%YMM6,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VDIVPD %YMM11,%YMM7,%YMM11 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VSCATTERDPD %YMM14,(%R9,%XMM4,8){%K4} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VPERM2I128 $0x11,%YMM4,%YMM4,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VSCATTERDPD %YMM9,(%R9,%XMM4,8){%K5} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VMOVAPD 0x400(%RSP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x3e0(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x420(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x3a0(%RSP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VDIVPD %YMM10,%YMM7,%YMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 8 |
VMOVAPD %YMM6,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM1,%YMM15,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x380(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %YMM14,%YMM2,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM14,%YMM3,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM9,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM1,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x3c0(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM18,%YMM11,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM2,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM3,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM23,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM9,%YMM5,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM22,%YMM14,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x340(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x320(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM21,%YMM9,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x360(%RSP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %YMM14,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM0,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM15,%YMM24 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x300(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM18,%YMM10,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM9,%YMM3,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM9,%YMM2,%YMM24 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM15,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD213PD 0x580(%RSP),%YMM1,%YMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x580(%RSP),%YMM0,%YMM15 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x5a0(%RSP),%YMM2,%YMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x5a0(%RSP),%YMM3,%YMM15 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM9,%YMM20,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM15,%YMM13,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM24,%YMM8,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM12,%YMM15,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM9,%YMM7,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM14,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM11,%YMM7,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM10,%YMM6,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %YMM11,(%R11,%XMM19,8){%K6} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VSCATTERDPD %YMM10,(%R11,%XMM4,8){%K7} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VMOVAPD 0x2c0(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x2a0(%RSP),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x2e0(%RSP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x280(%RSP),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM14,%YMM2,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM6,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %YMM14,%YMM3,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM1,%YMM11,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x240(%RSP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM0,%YMM11,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM2,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM14,%YMM3,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM9,%YMM17,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM9,%YMM16,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x220(%RSP),%YMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %YMM15,%YMM10,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x260(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %YMM6,%YMM7,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM9,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD231PD %YMM15,%YMM17,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM15,%YMM16,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x200(%RSP),%YMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %YMM1,%YMM15,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM0,%YMM15,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM1,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM26,%YMM25,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM29,%YMM28,%YMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM14,%YMM9,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM11,%YMM6,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM0,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %YMM26,%YMM25,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM29,%YMM28,%YMM9 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM23,%YMM11,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x1e0(%RSP),%YMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM14,%YMM5,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM10,%YMM22,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM30,%YMM3,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM7,%YMM21,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM27,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM30,%YMM2,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM27,%YMM2,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM16,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM11,%YMM16,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM17,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM11,%YMM17,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM3,%YMM0,%YMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM15,%YMM7,%YMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM9,%YMM10,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM1,%YMM2,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM0,%YMM13,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM20,%YMM1,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM7,%YMM13,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %YMM14,%YMM10,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM12,%YMM5,%YMM12 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM6,%YMM8,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %YMM8,(%RSI,%XMM19,8){%K2} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
VSCATTERDPD %YMM12,(%RSI,%XMM4,8){%K3} | 19 | 0 | 0 | 2 | 2 | 4 | 1 | 0 | 0 | 13 | 9 |
CMP %R8,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 445978 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |