Loop Id: 371 | Module: libqmcwfs.so | Source: BsplineFunctor.h:305-336 | Coverage: 0.37% |
---|
Loop Id: 371 | Module: libqmcwfs.so | Source: BsplineFunctor.h:305-336 | Coverage: 0.37% |
---|
0x3bc30 VMOVAPD (%RSI,%RDX,2),%ZMM11 [5] |
0x3bc37 VMOVAPD 0x40(%RSI,%RDX,2),%ZMM10 [5] |
0x3bc3f KMOVB %K1,%K2 |
0x3bc43 KMOVB %K1,%K5 |
0x3bc47 KMOVB %K1,%K3 |
0x3bc4b KMOVB %K1,%K6 |
0x3bc4f VMOVAPD 0x8c0(%RSP),%ZMM9 [16] |
0x3bc57 KMOVB %K1,%K7 |
0x3bc5b VMULPD %ZMM18,%ZMM11,%ZMM0 |
0x3bc61 KMOVB %K1,%K4 |
0x3bc65 VMOVAPD 0x780(%RSP),%ZMM24 [16] |
0x3bc6d VMOVDQA32 (%RDI,%RDX,1),%ZMM7 [1] |
0x3bc74 VMULPD %ZMM18,%ZMM10,%ZMM1 |
0x3bc7a ADD $0x40,%RDX |
0x3bc7e VRNDSCALEPD $0xb,%ZMM0,%ZMM3 |
0x3bc85 VCVTTPD2DQ %ZMM0,%YMM8 |
0x3bc8b VGATHERDPD (%RAX,%YMM8,8),%ZMM22{%K2} [14] |
0x3bc92 KMOVB %K1,%K2 |
0x3bc96 VRNDSCALEPD $0xb,%ZMM1,%ZMM12 |
0x3bc9d VCVTTPD2DQ %ZMM1,%YMM2 |
0x3bca3 VINSERTI64X4 $0x1,%YMM2,%ZMM8,%ZMM5 |
0x3bcaa VPADDD 0x2b90c(%RIP),%ZMM5,%ZMM14 [12] |
0x3bcb4 VPADDD 0x2b942(%RIP),%ZMM5,%ZMM15 [12] |
0x3bcbe VSHUFI32X4 $-0x12,%ZMM5,%ZMM5,%ZMM13 |
0x3bcc5 VPADDD 0x2b971(%RIP),%ZMM5,%ZMM5 [12] |
0x3bccf VGATHERDPD (%RAX,%YMM13,8),%ZMM21{%K3} [11] |
0x3bcd6 KMOVB %K1,%K3 |
0x3bcda VSHUFI32X4 $-0x12,%ZMM14,%ZMM14,%ZMM6 |
0x3bce1 VGATHERDPD (%RAX,%YMM15,8),%ZMM8{%K6} [17] |
0x3bce8 VSHUFI32X4 $-0x12,%ZMM15,%ZMM15,%ZMM13 |
0x3bcef VMOVAPD 0x900(%RSP),%ZMM15 [16] |
0x3bcf7 VGATHERDPD (%RAX,%YMM6,8),%ZMM4{%K5} [7] |
0x3bcfe VGATHERDPD (%RAX,%YMM5,8),%ZMM20{%K2} [2] |
0x3bd05 VSHUFI32X4 $-0x12,%ZMM5,%ZMM5,%ZMM6 |
0x3bd0c VMOVAPD 0x940(%RSP),%ZMM5 [16] |
0x3bd14 VSUBPD %ZMM3,%ZMM0,%ZMM0 |
0x3bd1a VGATHERDPD (%RAX,%YMM14,8),%ZMM23{%K4} [8] |
0x3bd21 VMOVAPD %ZMM9,%ZMM14 |
0x3bd27 KMOVB %K1,%K4 |
0x3bd2b VSUBPD %ZMM12,%ZMM1,%ZMM1 |
0x3bd31 VGATHERDPD (%RAX,%YMM13,8),%ZMM12{%K7} [10] |
0x3bd38 VGATHERDPD (%RAX,%YMM6,8),%ZMM13{%K3} [6] |
0x3bd3f KMOVB %K1,%K5 |
0x3bd43 VMOVAPD %ZMM5,%ZMM6 |
0x3bd49 KMOVB %K1,%K6 |
0x3bd4d KMOVB %K1,%K7 |
0x3bd51 KMOVB %K1,%K2 |
0x3bd55 VMULPD %ZMM0,%ZMM0,%ZMM3 |
0x3bd5b VFMADD132PD %ZMM0,%ZMM15,%ZMM6 |
0x3bd61 KMOVB %K1,%K3 |
0x3bd65 VMULPD %ZMM1,%ZMM1,%ZMM2 |
0x3bd6b VFMADD132PD %ZMM1,%ZMM15,%ZMM5 |
0x3bd71 VMOVAPD 0x880(%RSP),%ZMM15 [16] |
0x3bd79 VFMADD132PD %ZMM0,%ZMM15,%ZMM14 |
0x3bd7f VFMADD132PD %ZMM1,%ZMM15,%ZMM9 |
0x3bd85 VMOVAPD 0x840(%RSP),%ZMM15 [16] |
0x3bd8d VMULPD %ZMM3,%ZMM0,%ZMM17 |
0x3bd93 VMOVAPD %ZMM15,%ZMM19 |
0x3bd99 VMULPD %ZMM2,%ZMM1,%ZMM16 |
0x3bd9f VMULPD %ZMM9,%ZMM4,%ZMM9 |
0x3bda5 VMULPD %ZMM14,%ZMM23,%ZMM14 |
0x3bdab VFMADD132PD %ZMM21,%ZMM9,%ZMM5 |
0x3bdb1 VMOVAPD 0x7c0(%RSP),%ZMM9 [16] |
0x3bdb9 VFMADD132PD %ZMM22,%ZMM14,%ZMM6 |
0x3bdbf VMOVAPD 0x800(%RSP),%ZMM14 [16] |
0x3bdc7 VFMADD132PD %ZMM0,%ZMM14,%ZMM19 |
0x3bdcd VFMADD132PD %ZMM1,%ZMM14,%ZMM15 |
0x3bdd3 VMOVAPD %ZMM9,%ZMM14 |
0x3bdd9 VFMADD132PD %ZMM0,%ZMM24,%ZMM14 |
0x3bddf VFMADD132PD %ZMM1,%ZMM24,%ZMM9 |
0x3bde5 VMULPD %ZMM14,%ZMM20,%ZMM14 |
0x3bdeb VMULPD %ZMM9,%ZMM13,%ZMM9 |
0x3bdf1 VFMADD231PD %ZMM19,%ZMM8,%ZMM14 |
0x3bdf7 VMOVDQA32 %YMM7,%YMM19 |
0x3bdfd VFMADD231PD %ZMM15,%ZMM12,%ZMM9 |
0x3be03 VMOVAPD 0x740(%RSP),%ZMM15 [16] |
0x3be0b VADDPD %ZMM14,%ZMM6,%ZMM6 |
0x3be11 VADDPD %ZMM9,%ZMM5,%ZMM5 |
0x3be17 VMULPD %ZMM15,%ZMM6,%ZMM14 |
0x3be1d VBROADCASTSD 0x2b619(%RIP),%ZMM6 [12] |
0x3be27 VMULPD %ZMM15,%ZMM5,%ZMM9 |
0x3be2d VDIVPD %ZMM11,%ZMM6,%ZMM11 |
0x3be33 VDIVPD %ZMM10,%ZMM6,%ZMM10 |
0x3be39 VMULPD %ZMM18,%ZMM11,%ZMM11 |
0x3be3f VMULPD %ZMM18,%ZMM10,%ZMM10 |
0x3be45 VSCATTERDPD %ZMM14,(%R11,%YMM7,8){%K4} [9] |
0x3be4c VSHUFI32X4 $-0x12,%ZMM7,%ZMM7,%ZMM7 |
0x3be53 VSCATTERDPD %ZMM9,(%R11,%YMM7,8){%K5} [4] |
0x3be5a VMOVAPD 0x6c0(%RSP),%ZMM5 [16] |
0x3be62 VMOVAPD 0x680(%RSP),%ZMM15 [16] |
0x3be6a VMOVAPD 0x700(%RSP),%ZMM14 [16] |
0x3be72 VMOVAPD 0x600(%RSP),%ZMM9 [16] |
0x3be7a VMOVAPD %ZMM5,%ZMM6 |
0x3be80 VFMADD132PD %ZMM1,%ZMM15,%ZMM5 |
0x3be86 VFMADD132PD %ZMM0,%ZMM15,%ZMM6 |
0x3be8c VMOVAPD 0x5c0(%RSP),%ZMM15 [16] |
0x3be94 VFMADD231PD %ZMM14,%ZMM2,%ZMM5 |
0x3be9a VFMADD231PD %ZMM14,%ZMM3,%ZMM6 |
0x3bea0 VMOVAPD %ZMM9,%ZMM14 |
0x3bea6 VFMADD132PD %ZMM0,%ZMM15,%ZMM14 |
0x3beac VFMADD132PD %ZMM1,%ZMM15,%ZMM9 |
0x3beb2 VMOVAPD 0x640(%RSP),%ZMM15 [16] |
0x3beba VFMADD231PD %ZMM15,%ZMM3,%ZMM14 |
0x3bec0 VFMADD231PD %ZMM15,%ZMM2,%ZMM9 |
0x3bec6 VMULPD %ZMM14,%ZMM23,%ZMM14 |
0x3becc VMULPD %ZMM9,%ZMM4,%ZMM9 |
0x3bed2 VFMADD132PD %ZMM22,%ZMM14,%ZMM6 |
0x3bed8 VMOVAPD 0x540(%RSP),%ZMM14 [16] |
0x3bee0 VMOVAPD 0x500(%RSP),%ZMM15 [16] |
0x3bee8 VFMADD132PD %ZMM21,%ZMM9,%ZMM5 |
0x3beee VMOVAPD 0x580(%RSP),%ZMM9 [16] |
0x3bef6 VMOVAPD %ZMM14,%ZMM24 |
0x3befc VFMADD132PD %ZMM1,%ZMM15,%ZMM14 |
0x3bf02 VFMADD132PD %ZMM0,%ZMM15,%ZMM24 |
0x3bf08 VMOVAPD 0x4c0(%RSP),%ZMM15 [16] |
0x3bf10 VFMADD231PD %ZMM9,%ZMM2,%ZMM14 |
0x3bf16 VFMADD231PD %ZMM9,%ZMM3,%ZMM24 |
0x3bf1c VMOVAPD %ZMM15,%ZMM9 |
0x3bf22 VFMADD213PD 0x980(%RSP),%ZMM0,%ZMM9 [16] |
0x3bf2a VFMADD213PD 0x980(%RSP),%ZMM1,%ZMM15 [16] |
0x3bf32 VFMADD231PD 0x9c0(%RSP),%ZMM3,%ZMM9 [16] |
0x3bf3a VFMADD231PD 0x9c0(%RSP),%ZMM2,%ZMM15 [16] |
0x3bf42 VMULPD %ZMM9,%ZMM20,%ZMM9 |
0x3bf48 VMULPD %ZMM15,%ZMM13,%ZMM15 |
0x3bf4e VFMADD231PD %ZMM24,%ZMM8,%ZMM9 |
0x3bf54 VFMADD132PD %ZMM12,%ZMM15,%ZMM14 |
0x3bf5a VADDPD %ZMM9,%ZMM6,%ZMM6 |
0x3bf60 VADDPD %ZMM14,%ZMM5,%ZMM5 |
0x3bf66 VMULPD %ZMM11,%ZMM6,%ZMM11 |
0x3bf6c VMULPD %ZMM10,%ZMM5,%ZMM10 |
0x3bf72 VSCATTERDPD %ZMM11,(%R10,%YMM19,8){%K6} [13] |
0x3bf79 VSCATTERDPD %ZMM10,(%R10,%YMM7,8){%K7} [3] |
0x3bf80 VMOVAPD 0x440(%RSP),%ZMM14 [16] |
0x3bf88 VMOVAPD 0x400(%RSP),%ZMM6 [16] |
0x3bf90 VMOVAPD 0x480(%RSP),%ZMM15 [16] |
0x3bf98 VMOVAPD 0x3c0(%RSP),%ZMM11 [16] |
0x3bfa0 VMULPD %ZMM14,%ZMM3,%ZMM9 |
0x3bfa6 VMOVAPD %ZMM6,%ZMM10 |
0x3bfac VMULPD %ZMM14,%ZMM2,%ZMM5 |
0x3bfb2 VFMADD132PD %ZMM0,%ZMM11,%ZMM10 |
0x3bfb8 VMOVAPD 0x340(%RSP),%ZMM14 [16] |
0x3bfc0 VFMADD132PD %ZMM1,%ZMM11,%ZMM6 |
0x3bfc6 VMULPD %ZMM14,%ZMM3,%ZMM11 |
0x3bfcc VMULPD %ZMM14,%ZMM2,%ZMM14 |
0x3bfd2 VFMADD231PD %ZMM15,%ZMM17,%ZMM9 |
0x3bfd8 VFMADD231PD %ZMM15,%ZMM16,%ZMM5 |
0x3bfde VMOVAPD 0x280(%RSP),%ZMM15 [16] |
0x3bfe6 VADDPD %ZMM9,%ZMM10,%ZMM10 |
0x3bfec VMOVAPD 0x380(%RSP),%ZMM9 [16] |
0x3bff4 VADDPD %ZMM5,%ZMM6,%ZMM6 |
0x3bffa VFMADD231PD %ZMM9,%ZMM17,%ZMM11 |
0x3c000 VFMADD231PD %ZMM9,%ZMM16,%ZMM14 |
0x3c006 VMOVAPD 0x300(%RSP),%ZMM9 [16] |
0x3c00e VMOVAPD %ZMM9,%ZMM5 |
0x3c014 VFMADD132PD %ZMM1,%ZMM15,%ZMM9 |
0x3c01a VFMADD132PD %ZMM0,%ZMM15,%ZMM5 |
0x3c020 VADDPD %ZMM14,%ZMM9,%ZMM14 |
0x3c026 VMOVAPD %ZMM0,%ZMM9 |
0x3c02c VFMADD132PD %ZMM26,%ZMM25,%ZMM0 |
0x3c032 VFMADD132PD %ZMM30,%ZMM29,%ZMM9 |
0x3c038 VADDPD %ZMM11,%ZMM5,%ZMM11 |
0x3c03e VMULPD %ZMM14,%ZMM4,%ZMM4 |
0x3c044 VMOVAPD %ZMM1,%ZMM14 |
0x3c04a VMULPD %ZMM23,%ZMM11,%ZMM5 |
0x3c050 VFMADD132PD %ZMM26,%ZMM25,%ZMM1 |
0x3c056 VFMADD132PD %ZMM30,%ZMM29,%ZMM14 |
0x3c05c VFMADD231PD %ZMM6,%ZMM21,%ZMM4 |
0x3c062 VMOVAPD 0x2c0(%RSP),%ZMM6 [16] |
0x3c06a VFMADD231PD %ZMM10,%ZMM22,%ZMM5 |
0x3c070 VMULPD %ZMM6,%ZMM2,%ZMM15 |
0x3c076 VMULPD %ZMM6,%ZMM3,%ZMM10 |
0x3c07c VMULPD %ZMM27,%ZMM2,%ZMM2 |
0x3c082 VMULPD %ZMM27,%ZMM3,%ZMM3 |
0x3c088 VFMADD231PD %ZMM31,%ZMM16,%ZMM15 |
0x3c08e VFMADD231PD %ZMM31,%ZMM17,%ZMM10 |
0x3c094 VFMADD231PD %ZMM28,%ZMM16,%ZMM2 |
0x3c09a VFMADD231PD %ZMM28,%ZMM17,%ZMM3 |
0x3c0a0 VADDPD %ZMM15,%ZMM14,%ZMM6 |
0x3c0a6 VADDPD %ZMM10,%ZMM9,%ZMM11 |
0x3c0ac VADDPD %ZMM1,%ZMM2,%ZMM1 |
0x3c0b2 VADDPD %ZMM3,%ZMM0,%ZMM0 |
0x3c0b8 VMULPD %ZMM1,%ZMM13,%ZMM13 |
0x3c0be VMULPD %ZMM20,%ZMM0,%ZMM10 |
0x3c0c4 VFMADD132PD %ZMM6,%ZMM13,%ZMM12 |
0x3c0ca VFMADD132PD %ZMM11,%ZMM10,%ZMM8 |
0x3c0d0 VADDPD %ZMM12,%ZMM4,%ZMM12 |
0x3c0d6 VADDPD %ZMM5,%ZMM8,%ZMM8 |
0x3c0dc VSCATTERDPD %ZMM8,(%R9,%YMM19,8){%K2} [18] |
0x3c0e3 VSCATTERDPD %ZMM12,(%R9,%YMM7,8){%K3} [15] |
0x3c0ea CMP %R13,%RDX |
0x3c0ed JNE 3bc30 |
/home/kcamus/qaas_runs/169-390-4082/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 305 - 336 |
-------------------------------------------------------------------------------- |
305: real_type r = distArrayCompressed[j]; |
306: int iScatter = distIndices[j]; |
307: real_type rinv = cOne / r; |
308: r *= DeltaRInv; |
309: int iGather = (int)r; |
310: real_type t = r - real_type(iGather); |
311: real_type tp0 = t * t * t; |
312: real_type tp1 = t * t; |
313: real_type tp2 = t; |
314: |
315: real_type sCoef0 = SplineCoefs[iGather + 0]; |
316: real_type sCoef1 = SplineCoefs[iGather + 1]; |
317: real_type sCoef2 = SplineCoefs[iGather + 2]; |
318: real_type sCoef3 = SplineCoefs[iGather + 3]; |
319: |
320: // clang-format off |
321: laplArray[iScatter] = dSquareDeltaRinv * |
322: (sCoef0*( d2A[ 2]*tp2 + d2A[ 3])+ |
323: sCoef1*( d2A[ 6]*tp2 + d2A[ 7])+ |
324: sCoef2*( d2A[10]*tp2 + d2A[11])+ |
325: sCoef3*( d2A[14]*tp2 + d2A[15])); |
326: |
327: gradArray[iScatter] = DeltaRInv * rinv * |
328: (sCoef0*( dA[ 1]*tp1 + dA[ 2]*tp2 + dA[ 3])+ |
329: sCoef1*( dA[ 5]*tp1 + dA[ 6]*tp2 + dA[ 7])+ |
330: sCoef2*( dA[ 9]*tp1 + dA[10]*tp2 + dA[11])+ |
331: sCoef3*( dA[13]*tp1 + dA[14]*tp2 + dA[15])); |
332: |
333: valArray[iScatter] = (sCoef0*(A[ 0]*tp0 + A[ 1]*tp1 + A[ 2]*tp2 + A[ 3])+ |
334: sCoef1*(A[ 4]*tp0 + A[ 5]*tp1 + A[ 6]*tp2 + A[ 7])+ |
335: sCoef2*(A[ 8]*tp0 + A[ 9]*tp1 + A[10]*tp2 + A[11])+ |
336: sCoef3*(A[12]*tp0 + A[13]*tp1 + A[14]*tp2 + A[15])); |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:300 | libqmcwfs.so |
○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:207 | libqmcwfs.so |
○ | main._omp_fn.1 | stl_vector.h:1123 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.04 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.02 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.19 |
Bottlenecks | micro-operation queue, |
Function | miniqmcreference::TwoBodyJastrowRef |
Source | BsplineFunctor.h:305-336 |
Source loop unroll info | unrolled by 16 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | main |
Unroll factor | 16 |
CQA cycles | 94.50 |
CQA cycles if no scalar integer | 91.00 |
CQA cycles if FP arith vectorized | 94.50 |
CQA cycles if fully vectorized | 92.58 |
Front-end cycles | 94.50 |
DIV/SQRT cycles | 79.50 |
P0 cycles | 50.00 |
P1 cycles | 75.50 |
P2 cycles | 75.50 |
P3 cycles | 48.00 |
P4 cycles | 79.50 |
P5 cycles | 1.00 |
P6 cycles | 0.00 |
P7 cycles | 32.00 |
Inter-iter dependencies cycles | 29 |
FE+BE cycles (UFS) | 91.13 |
Stall cycles (UFS) | 37.63 |
Nb insns | 191.00 |
Nb uops | 378.00 |
Nb loads | 47.00 |
Nb stores | 6.00 |
Nb stack references | 30.00 |
FLOP/cycle | 13.04 |
Nb FLOP add-sub | 128.00 |
Nb FLOP mul | 256.00 |
Nb FLOP fma | 416.00 |
Nb FLOP div | 16.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 35.30 |
Bytes prefetched | 0.00 |
Bytes loaded | 2952.00 |
Bytes stored | 384.00 |
Stride 0 | 10.00 |
Stride 1 | 2.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 3.00 |
Vectorization ratio all | 99.43 |
Vectorization ratio load | 97.87 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 96.88 |
Vector-efficiency ratio all | 97.49 |
Vector-efficiency ratio load | 98.14 |
Vector-efficiency ratio store | 100.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 100.00 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 86.33 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.04 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.02 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.19 |
Bottlenecks | micro-operation queue, |
Function | miniqmcreference::TwoBodyJastrowRef |
Source | BsplineFunctor.h:305-336 |
Source loop unroll info | unrolled by 16 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | main |
Unroll factor | 16 |
CQA cycles | 94.50 |
CQA cycles if no scalar integer | 91.00 |
CQA cycles if FP arith vectorized | 94.50 |
CQA cycles if fully vectorized | 92.58 |
Front-end cycles | 94.50 |
DIV/SQRT cycles | 79.50 |
P0 cycles | 50.00 |
P1 cycles | 75.50 |
P2 cycles | 75.50 |
P3 cycles | 48.00 |
P4 cycles | 79.50 |
P5 cycles | 1.00 |
P6 cycles | 0.00 |
P7 cycles | 32.00 |
Inter-iter dependencies cycles | 29 |
FE+BE cycles (UFS) | 91.13 |
Stall cycles (UFS) | 37.63 |
Nb insns | 191.00 |
Nb uops | 378.00 |
Nb loads | 47.00 |
Nb stores | 6.00 |
Nb stack references | 30.00 |
FLOP/cycle | 13.04 |
Nb FLOP add-sub | 128.00 |
Nb FLOP mul | 256.00 |
Nb FLOP fma | 416.00 |
Nb FLOP div | 16.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 35.30 |
Bytes prefetched | 0.00 |
Bytes loaded | 2952.00 |
Bytes stored | 384.00 |
Stride 0 | 10.00 |
Stride 1 | 2.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 3.00 |
Vectorization ratio all | 99.43 |
Vectorization ratio load | 97.87 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 96.88 |
Vector-efficiency ratio all | 97.49 |
Vector-efficiency ratio load | 98.14 |
Vector-efficiency ratio store | 100.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 100.00 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 86.33 |
Path / |
Function | miniqmcreference::TwoBodyJastrowRef |
Source file and lines | BsplineFunctor.h:305-336 |
Module | libqmcwfs.so |
nb instructions | 191 |
nb uops | 378 |
loop length | 1219 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 9 |
used zmm registers | 32 |
nb stack references | 30 |
ADD-SUB / MUL ratio | 0.50 |
micro-operation queue | 94.50 cycles |
front end | 94.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 79.50 | 1.00 | 75.50 | 75.50 | 48.00 | 79.50 | 1.00 | 0.00 |
cycles | 79.50 | 50.00 | 75.50 | 75.50 | 48.00 | 79.50 | 1.00 | 0.00 |
Cycles executing div or sqrt instructions | 32.00 |
Longest recurrence chain latency (RecMII) | 29.00 |
FE+BE cycles | 91.13 |
Stall cycles | 37.63 |
RS full (events) | 0.06 |
PRF_FLOAT full (events) | 44.04 |
Front-end | 94.50 |
Dispatch | 79.50 |
DIV/SQRT | 32.00 |
Data deps. | 29.00 |
Overall L1 | 94.50 |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 100% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 68% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 50% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 97% |
load | 98% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 86% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
VMOVAPD (%RSI,%RDX,2),%ZMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x40(%RSI,%RDX,2),%ZMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVAPD 0x8c0(%RSP),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %ZMM18,%ZMM11,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVAPD 0x780(%RSP),%ZMM24 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVDQA32 (%RDI,%RDX,1),%ZMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMULPD %ZMM18,%ZMM10,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
ADD $0x40,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VRNDSCALEPD $0xb,%ZMM0,%ZMM3 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %ZMM0,%YMM8 | 2 | 0.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 7 | 1 |
VGATHERDPD (%RAX,%YMM8,8),%ZMM22{%K2} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VRNDSCALEPD $0xb,%ZMM1,%ZMM12 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %ZMM1,%YMM2 | 2 | 0.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 7 | 1 |
VINSERTI64X4 $0x1,%YMM2,%ZMM8,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDD 0x2b90c(%RIP),%ZMM5,%ZMM14 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDD 0x2b942(%RIP),%ZMM5,%ZMM15 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VSHUFI32X4 $-0x12,%ZMM5,%ZMM5,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDD 0x2b971(%RIP),%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERDPD (%RAX,%YMM13,8),%ZMM21{%K3} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSHUFI32X4 $-0x12,%ZMM14,%ZMM14,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VGATHERDPD (%RAX,%YMM15,8),%ZMM8{%K6} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VSHUFI32X4 $-0x12,%ZMM15,%ZMM15,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD 0x900(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VGATHERDPD (%RAX,%YMM6,8),%ZMM4{%K5} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VGATHERDPD (%RAX,%YMM5,8),%ZMM20{%K2} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VSHUFI32X4 $-0x12,%ZMM5,%ZMM5,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD 0x940(%RSP),%ZMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VSUBPD %ZMM3,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VGATHERDPD (%RAX,%YMM14,8),%ZMM23{%K4} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VMOVAPD %ZMM9,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %ZMM12,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VGATHERDPD (%RAX,%YMM13,8),%ZMM12{%K7} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VGATHERDPD (%RAX,%YMM6,8),%ZMM13{%K3} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVAPD %ZMM5,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %ZMM0,%ZMM0,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %ZMM1,%ZMM1,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x880(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x840(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %ZMM3,%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM15,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %ZMM2,%ZMM1,%ZMM16 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM9,%ZMM4,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM23,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM21,%ZMM9,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x7c0(%RSP),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM22,%ZMM14,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x800(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM0,%ZMM14,%ZMM19 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM14,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM9,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM0,%ZMM24,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM24,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM20,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM9,%ZMM13,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM19,%ZMM8,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVDQA32 %YMM7,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD231PD %ZMM15,%ZMM12,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x740(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %ZMM14,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM9,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM15,%ZMM6,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD 0x2b619(%RIP),%ZMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMULPD %ZMM15,%ZMM5,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VDIVPD %ZMM11,%ZMM6,%ZMM11 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 24 | 16 |
VDIVPD %ZMM10,%ZMM6,%ZMM10 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 24 | 16 |
VMULPD %ZMM18,%ZMM11,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM18,%ZMM10,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %ZMM14,(%R11,%YMM7,8){%K4} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VSHUFI32X4 $-0x12,%ZMM7,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VSCATTERDPD %ZMM9,(%R11,%YMM7,8){%K5} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VMOVAPD 0x6c0(%RSP),%ZMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x680(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x700(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x600(%RSP),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %ZMM5,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x5c0(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %ZMM14,%ZMM2,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM14,%ZMM3,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM9,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x640(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %ZMM15,%ZMM3,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM15,%ZMM2,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM23,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM9,%ZMM4,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM22,%ZMM14,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x540(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x500(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM21,%ZMM9,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x580(%RSP),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %ZMM14,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM24 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x4c0(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %ZMM9,%ZMM2,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM9,%ZMM3,%ZMM24 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM15,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD213PD 0x980(%RSP),%ZMM0,%ZMM9 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x980(%RSP),%ZMM1,%ZMM15 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x9c0(%RSP),%ZMM3,%ZMM9 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x9c0(%RSP),%ZMM2,%ZMM15 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM9,%ZMM20,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM15,%ZMM13,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM24,%ZMM8,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM12,%ZMM15,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM9,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM14,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM11,%ZMM6,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM10,%ZMM5,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %ZMM11,(%R10,%YMM19,8){%K6} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VSCATTERDPD %ZMM10,(%R10,%YMM7,8){%K7} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VMOVAPD 0x440(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x400(%RSP),%ZMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x480(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x3c0(%RSP),%ZMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %ZMM14,%ZMM3,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM6,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %ZMM14,%ZMM2,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM11,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x340(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM1,%ZMM11,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM3,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM2,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM15,%ZMM17,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM15,%ZMM16,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x280(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %ZMM9,%ZMM10,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x380(%RSP),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %ZMM5,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM9,%ZMM17,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM9,%ZMM16,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x300(%RSP),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %ZMM9,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM14,%ZMM9,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM0,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM26,%ZMM25,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM30,%ZMM29,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM11,%ZMM5,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM1,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %ZMM23,%ZMM11,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM26,%ZMM25,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM30,%ZMM29,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM6,%ZMM21,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x2c0(%RSP),%ZMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %ZMM10,%ZMM22,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM6,%ZMM2,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM6,%ZMM3,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM27,%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM27,%ZMM3,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM31,%ZMM16,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM31,%ZMM17,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM28,%ZMM16,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM28,%ZMM17,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM15,%ZMM14,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM10,%ZMM9,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM1,%ZMM2,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM3,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM1,%ZMM13,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM20,%ZMM0,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM6,%ZMM13,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM11,%ZMM10,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM12,%ZMM4,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM5,%ZMM8,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %ZMM8,(%R9,%YMM19,8){%K2} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VSCATTERDPD %ZMM12,(%R9,%YMM7,8){%K3} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
CMP %R13,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 3bc30 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
Function | miniqmcreference::TwoBodyJastrowRef |
Source file and lines | BsplineFunctor.h:305-336 |
Module | libqmcwfs.so |
nb instructions | 191 |
nb uops | 378 |
loop length | 1219 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 9 |
used zmm registers | 32 |
nb stack references | 30 |
ADD-SUB / MUL ratio | 0.50 |
micro-operation queue | 94.50 cycles |
front end | 94.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 79.50 | 1.00 | 75.50 | 75.50 | 48.00 | 79.50 | 1.00 | 0.00 |
cycles | 79.50 | 50.00 | 75.50 | 75.50 | 48.00 | 79.50 | 1.00 | 0.00 |
Cycles executing div or sqrt instructions | 32.00 |
Longest recurrence chain latency (RecMII) | 29.00 |
FE+BE cycles | 91.13 |
Stall cycles | 37.63 |
RS full (events) | 0.06 |
PRF_FLOAT full (events) | 44.04 |
Front-end | 94.50 |
Dispatch | 79.50 |
DIV/SQRT | 32.00 |
Data deps. | 29.00 |
Overall L1 | 94.50 |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 100% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 68% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 50% |
all | 99% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 97% |
load | 98% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 86% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
VMOVAPD (%RSI,%RDX,2),%ZMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x40(%RSI,%RDX,2),%ZMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVAPD 0x8c0(%RSP),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %ZMM18,%ZMM11,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVAPD 0x780(%RSP),%ZMM24 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVDQA32 (%RDI,%RDX,1),%ZMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMULPD %ZMM18,%ZMM10,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
ADD $0x40,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VRNDSCALEPD $0xb,%ZMM0,%ZMM3 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %ZMM0,%YMM8 | 2 | 0.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 7 | 1 |
VGATHERDPD (%RAX,%YMM8,8),%ZMM22{%K2} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VRNDSCALEPD $0xb,%ZMM1,%ZMM12 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 8 | 1 |
VCVTTPD2DQ %ZMM1,%YMM2 | 2 | 0.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 7 | 1 |
VINSERTI64X4 $0x1,%YMM2,%ZMM8,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDD 0x2b90c(%RIP),%ZMM5,%ZMM14 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDD 0x2b942(%RIP),%ZMM5,%ZMM15 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VSHUFI32X4 $-0x12,%ZMM5,%ZMM5,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDD 0x2b971(%RIP),%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VGATHERDPD (%RAX,%YMM13,8),%ZMM21{%K3} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSHUFI32X4 $-0x12,%ZMM14,%ZMM14,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VGATHERDPD (%RAX,%YMM15,8),%ZMM8{%K6} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VSHUFI32X4 $-0x12,%ZMM15,%ZMM15,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD 0x900(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VGATHERDPD (%RAX,%YMM6,8),%ZMM4{%K5} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VGATHERDPD (%RAX,%YMM5,8),%ZMM20{%K2} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VSHUFI32X4 $-0x12,%ZMM5,%ZMM5,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVAPD 0x940(%RSP),%ZMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VSUBPD %ZMM3,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VGATHERDPD (%RAX,%YMM14,8),%ZMM23{%K4} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VMOVAPD %ZMM9,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KMOVB %K1,%K4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %ZMM12,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VGATHERDPD (%RAX,%YMM13,8),%ZMM12{%K7} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
VGATHERDPD (%RAX,%YMM6,8),%ZMM13{%K3} | 4 | 1 | 0 | 4 | 4 | 0 | 1 | 0 | 0 | 21 | 5 |
KMOVB %K1,%K5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVAPD %ZMM5,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KMOVB %K1,%K6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K7 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
KMOVB %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %ZMM0,%ZMM0,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
KMOVB %K1,%K3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %ZMM1,%ZMM1,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x880(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x840(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %ZMM3,%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM15,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %ZMM2,%ZMM1,%ZMM16 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM9,%ZMM4,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM23,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM21,%ZMM9,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x7c0(%RSP),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM22,%ZMM14,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x800(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM0,%ZMM14,%ZMM19 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM14,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM9,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM0,%ZMM24,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM24,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM20,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM9,%ZMM13,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM19,%ZMM8,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVDQA32 %YMM7,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD231PD %ZMM15,%ZMM12,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x740(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %ZMM14,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM9,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM15,%ZMM6,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD 0x2b619(%RIP),%ZMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMULPD %ZMM15,%ZMM5,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VDIVPD %ZMM11,%ZMM6,%ZMM11 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 24 | 16 |
VDIVPD %ZMM10,%ZMM6,%ZMM10 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 24 | 16 |
VMULPD %ZMM18,%ZMM11,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM18,%ZMM10,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %ZMM14,(%R11,%YMM7,8){%K4} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VSHUFI32X4 $-0x12,%ZMM7,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VSCATTERDPD %ZMM9,(%R11,%YMM7,8){%K5} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VMOVAPD 0x6c0(%RSP),%ZMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x680(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x700(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x600(%RSP),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %ZMM5,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x5c0(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %ZMM14,%ZMM2,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM14,%ZMM3,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM9,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x640(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %ZMM15,%ZMM3,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM15,%ZMM2,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM23,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM9,%ZMM4,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM22,%ZMM14,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x540(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x500(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM21,%ZMM9,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x580(%RSP),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %ZMM14,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM24 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x4c0(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %ZMM9,%ZMM2,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM9,%ZMM3,%ZMM24 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM15,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD213PD 0x980(%RSP),%ZMM0,%ZMM9 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD213PD 0x980(%RSP),%ZMM1,%ZMM15 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x9c0(%RSP),%ZMM3,%ZMM9 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x9c0(%RSP),%ZMM2,%ZMM15 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM9,%ZMM20,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM15,%ZMM13,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM24,%ZMM8,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM12,%ZMM15,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM9,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM14,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM11,%ZMM6,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM10,%ZMM5,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %ZMM11,(%R10,%YMM19,8){%K6} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VSCATTERDPD %ZMM10,(%R10,%YMM7,8){%K7} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VMOVAPD 0x440(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x400(%RSP),%ZMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x480(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD 0x3c0(%RSP),%ZMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %ZMM14,%ZMM3,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM6,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %ZMM14,%ZMM2,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM11,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x340(%RSP),%ZMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD132PD %ZMM1,%ZMM11,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM3,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM2,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM15,%ZMM17,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM15,%ZMM16,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x280(%RSP),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %ZMM9,%ZMM10,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x380(%RSP),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VADDPD %ZMM5,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM9,%ZMM17,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM9,%ZMM16,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x300(%RSP),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMOVAPD %ZMM9,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM1,%ZMM15,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM0,%ZMM15,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM14,%ZMM9,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM0,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VFMADD132PD %ZMM26,%ZMM25,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM30,%ZMM29,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM11,%ZMM5,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM14,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD %ZMM1,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMULPD %ZMM23,%ZMM11,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM26,%ZMM25,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM30,%ZMM29,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM6,%ZMM21,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVAPD 0x2c0(%RSP),%ZMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD231PD %ZMM10,%ZMM22,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM6,%ZMM2,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM6,%ZMM3,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM27,%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM27,%ZMM3,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM31,%ZMM16,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM31,%ZMM17,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM28,%ZMM16,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %ZMM28,%ZMM17,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM15,%ZMM14,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM10,%ZMM9,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM1,%ZMM2,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM3,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM1,%ZMM13,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMULPD %ZMM20,%ZMM0,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM6,%ZMM13,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VFMADD132PD %ZMM11,%ZMM10,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM12,%ZMM4,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VADDPD %ZMM5,%ZMM8,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VSCATTERDPD %ZMM8,(%R9,%YMM19,8){%K2} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
VSCATTERDPD %ZMM12,(%R9,%YMM7,8){%K3} | 27 | 0 | 0 | 4 | 4 | 8 | 1 | 0 | 0 | 15 | 11 |
CMP %R13,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 3bc30 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |