Loop Id: 1167 | Module: exec | Source: ParticleBConds.h:185-217 | Coverage: 18.02% |
---|
Loop Id: 1167 | Module: exec | Source: ParticleBConds.h:185-217 | Coverage: 18.02% |
---|
0x45e420 LEA (%R9,%RDX,1),%EAX |
0x45e424 VPBROADCASTD %EAX,%XMM3 |
0x45e42a VPADDD 0x3df4e(%RIP),%XMM3,%XMM3 [6] |
0x45e432 CLTQ |
0x45e434 VPCMPGTD %XMM3,%XMM21,%K1 |
0x45e43a VMOVUPD (%R13,%RAX,8),%YMM3 [10] |
0x45e441 VSUBPD 0x300(%RSP),%YMM3,%YMM3 [9] |
0x45e44a VMOVUPD (%R12,%RAX,8),%YMM4 [4] |
0x45e450 VBROADCASTSD 0x37bd7(%RIP),%YMM14 [6] |
0x45e459 VBROADCASTSD 0x37bc5(%RIP),%YMM20 [6] |
0x45e463 VBLENDMPD %YMM20,%YMM14,%YMM25{%K1} |
0x45e469 VMULPD %YMM25,%YMM3,%YMM28 |
0x45e46f VSUBPD 0x340(%RSP),%YMM4,%YMM3 [9] |
0x45e478 VMOVUPD (%RSI,%RAX,8),%YMM4 [1] |
0x45e47d VSUBPD 0x360(%RSP),%YMM4,%YMM4 [9] |
0x45e486 VMULPD %YMM25,%YMM3,%YMM26 |
0x45e48c VMULPD %YMM25,%YMM4,%YMM3 |
0x45e492 VMULPD 0x320(%RSP),%YMM28,%YMM4 [9] |
0x45e49a VFMADD231PD 0x380(%RSP),%YMM26,%YMM4 [9] |
0x45e4a2 VFMADD231PD 0x1a0(%RSP),%YMM3,%YMM4 [9] |
0x45e4ac VRNDSCALEPD $0x9,%YMM4,%YMM27 |
0x45e4b3 VMULPD 0x2c0(%RSP),%YMM28,%YMM4 [9] |
0x45e4bb VFMADD231PD 0x2e0(%RSP),%YMM26,%YMM4 [9] |
0x45e4c3 VFMADD231PD 0x1c0(%RSP),%YMM3,%YMM4 [9] |
0x45e4cd VROUNDPD $0x9,%YMM4,%YMM4 |
0x45e4d3 VMULPD 0x1e0(%RSP),%YMM28,%YMM29 [9] |
0x45e4db VFMADD231PD 0x200(%RSP),%YMM26,%YMM29 [9] |
0x45e4e3 VFMADD231PD 0x3c0(%RSP),%YMM3,%YMM29 [9] |
0x45e4eb VRNDSCALEPD $0x9,%YMM29,%YMM29 |
0x45e4f2 VFMSUB231PD %YMM2,%YMM27,%YMM28 |
0x45e4f8 VFMADD231PD 0x180(%RSP),%YMM4,%YMM28 [9] |
0x45e500 VFNMSUB231PD %YMM29,%YMM7,%YMM28 |
0x45e506 VFMSUB231PD %YMM1,%YMM27,%YMM26 |
0x45e50c VFMADD231PD 0x160(%RSP),%YMM4,%YMM26 [9] |
0x45e514 VFMSUB213PD %YMM3,%YMM0,%YMM27 |
0x45e51a VFNMSUB231PD %YMM24,%YMM29,%YMM26 |
0x45e520 VFMADD231PD 0x3a0(%RSP),%YMM4,%YMM27 [9] |
0x45e528 VFNMSUB231PD 0x3e0(%RSP),%YMM29,%YMM27 [9] |
0x45e530 VMULPD %YMM28,%YMM28,%YMM3 |
0x45e536 VFMADD231PD %YMM26,%YMM26,%YMM3 |
0x45e53c VADDPD 0x420(%RSP),%YMM28,%YMM4 [9] |
0x45e544 VFMADD231PD %YMM27,%YMM27,%YMM3 |
0x45e54a VADDPD 0x400(%RSP),%YMM26,%YMM29 [9] |
0x45e552 VMOVAPD %YMM7,%YMM20 |
0x45e558 VMOVAPD %YMM31,%YMM7 |
0x45e55e VADDPD 0x460(%RSP),%YMM27,%YMM31 [9] |
0x45e566 VMULPD %YMM4,%YMM4,%YMM4 |
0x45e56a VFMADD231PD %YMM29,%YMM29,%YMM4 |
0x45e570 VADDPD %YMM28,%YMM5,%YMM29 |
0x45e576 VFMADD231PD %YMM31,%YMM31,%YMM4 |
0x45e57c VADDPD %YMM26,%YMM6,%YMM31 |
0x45e582 VMOVAPD %YMM2,%YMM14 |
0x45e586 VMOVAPD %YMM1,%YMM2 |
0x45e58a VMOVAPD %YMM0,%YMM1 |
0x45e58e VMOVAPD %YMM5,%YMM0 |
0x45e592 VADDPD 0x440(%RSP),%YMM27,%YMM5 [9] |
0x45e59a VMULPD %YMM29,%YMM29,%YMM29 |
0x45e5a0 VFMADD231PD %YMM31,%YMM31,%YMM29 |
0x45e5a6 VFMADD231PD %YMM5,%YMM5,%YMM29 |
0x45e5ac VMINPD %YMM3,%YMM4,%YMM5 |
0x45e5b0 VCMPPD $0x1,%YMM5,%YMM29,%K1 |
0x45e5b7 VMINPD %YMM5,%YMM29,%YMM5 |
0x45e5bd VADDPD 0x120(%RSP),%YMM28,%YMM29 [9] |
0x45e5c5 VADDPD %YMM26,%YMM23,%YMM31 |
0x45e5cb VMULPD %YMM29,%YMM29,%YMM29 |
0x45e5d1 VMOVAPD %YMM6,%YMM22 |
0x45e5d7 VADDPD %YMM27,%YMM19,%YMM6 |
0x45e5dd VFMADD231PD %YMM31,%YMM31,%YMM29 |
0x45e5e3 VFMADD231PD %YMM6,%YMM6,%YMM29 |
0x45e5e9 VCMPPD $0x1,%YMM5,%YMM29,%K2 |
0x45e5f0 VADDPD %YMM28,%YMM17,%YMM6 |
0x45e5f6 VADDPD %YMM26,%YMM18,%YMM31 |
0x45e5fc VMINPD %YMM5,%YMM29,%YMM5 |
0x45e602 VADDPD %YMM27,%YMM16,%YMM29 |
0x45e608 VMULPD %YMM6,%YMM6,%YMM6 |
0x45e60c VFMADD231PD %YMM31,%YMM31,%YMM6 |
0x45e612 VFMADD231PD %YMM29,%YMM29,%YMM6 |
0x45e618 VADDPD %YMM28,%YMM13,%YMM29 |
0x45e61e VCMPPD $0x1,%YMM5,%YMM6,%K3 |
0x45e625 VADDPD %YMM26,%YMM15,%YMM31 |
0x45e62b VMULPD %YMM29,%YMM29,%YMM29 |
0x45e631 VFMADD231PD %YMM31,%YMM31,%YMM29 |
0x45e637 VADDPD %YMM27,%YMM30,%YMM31 |
0x45e63d VFMADD231PD %YMM31,%YMM31,%YMM29 |
0x45e643 VMINPD %YMM5,%YMM6,%YMM5 |
0x45e647 VADDPD %YMM28,%YMM11,%YMM6 |
0x45e64d VADDPD %YMM26,%YMM12,%YMM31 |
0x45e653 VMULPD %YMM6,%YMM6,%YMM6 |
0x45e657 VFMADD231PD %YMM31,%YMM31,%YMM6 |
0x45e65d VMOVAPD %YMM7,%YMM31 |
0x45e663 VMOVAPD %YMM20,%YMM7 |
0x45e669 VCMPPD $0x1,%YMM5,%YMM29,%K4 |
0x45e670 VMINPD %YMM5,%YMM29,%YMM5 |
0x45e676 VADDPD %YMM27,%YMM10,%YMM29 |
0x45e67c VFMADD231PD %YMM29,%YMM29,%YMM6 |
0x45e682 VCMPPD $0x1,%YMM5,%YMM6,%K5 |
0x45e689 VMINPD %YMM5,%YMM6,%YMM5 |
0x45e68d VADDPD %YMM28,%YMM8,%YMM6 |
0x45e693 VADDPD %YMM26,%YMM9,%YMM29 |
0x45e699 VMULPD %YMM6,%YMM6,%YMM6 |
0x45e69d VFMADD231PD %YMM29,%YMM29,%YMM6 |
0x45e6a3 VADDPD %YMM27,%YMM31,%YMM29 |
0x45e6a9 VFMADD231PD %YMM29,%YMM29,%YMM6 |
0x45e6af VCMPPD $0x1,%YMM5,%YMM6,%K6 |
0x45e6b6 VMINPD %YMM5,%YMM6,%YMM5 |
0x45e6ba VMOVAPD %YMM22,%YMM6 |
0x45e6c0 VCMPPD $0x1,%YMM3,%YMM4,%YMM3 |
0x45e6c5 VSQRTPD %YMM5,%YMM4 |
0x45e6c9 VMOVAPD %YMM0,%YMM5 |
0x45e6cd VMOVAPD %YMM1,%YMM0 |
0x45e6d1 VMOVAPD %YMM2,%YMM1 |
0x45e6d5 VMOVAPD %YMM14,%YMM2 |
0x45e6d9 VANDPD 0x37965(%RIP){1to4},%YMM3,%YMM3 [6] |
0x45e6e3 VPBROADCASTQ 0x3dc63(%RIP),%YMM3{%K1} [6] |
0x45e6ed VPBROADCASTQ 0x3dc61(%RIP),%YMM3{%K2} [6] |
0x45e6f7 VPBROADCASTQ 0x3dc5f(%RIP),%YMM3{%K3} [6] |
0x45e701 VPBROADCASTQ 0x3dc5d(%RIP),%YMM3{%K4} [6] |
0x45e70b VPBROADCASTQ 0x3dc5b(%RIP),%YMM3{%K5} [6] |
0x45e715 VPBROADCASTQ 0x3dc59(%RIP),%YMM3{%K6} [6] |
0x45e71f KXNORW %K0,%K0,%K1 |
0x45e723 VMOVUPD %YMM4,(%RCX,%RAX,8) [5] |
0x45e728 VXORPD %XMM4,%XMM4,%XMM4 |
0x45e72c VGATHERQPD 0x90(%RDI,%YMM3,8),%YMM4{%K1} [8] |
0x45e734 VADDPD %YMM28,%YMM4,%YMM4 |
0x45e73a VMULPD %YMM25,%YMM4,%YMM4 |
0x45e740 KXNORW %K0,%K0,%K1 |
0x45e744 VMOVUPD %YMM4,(%R10,%RAX,8) [2] |
0x45e74a VXORPD %XMM4,%XMM4,%XMM4 |
0x45e74e VGATHERQPD 0xd0(%RDI,%YMM3,8),%YMM4{%K1} [8] |
0x45e756 VADDPD %YMM26,%YMM4,%YMM4 |
0x45e75c VMULPD %YMM25,%YMM4,%YMM4 |
0x45e762 KXNORW %K0,%K0,%K1 |
0x45e766 VMOVUPD %YMM4,(%R8,%RAX,8) [7] |
0x45e76c VXORPD %XMM4,%XMM4,%XMM4 |
0x45e770 VGATHERQPD 0x110(%RDI,%YMM3,8),%YMM4{%K1} [8] |
0x45e778 VADDPD %YMM27,%YMM4,%YMM3 |
0x45e77e VMULPD %YMM25,%YMM3,%YMM3 |
0x45e784 VMOVUPD %YMM3,(%R11,%RAX,8) [3] |
0x45e78a ADD $0x4,%EDX |
0x45e78d CMP %EBX,%EDX |
0x45e78f JBE 45e420 |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds.h: 185 - 217 |
-------------------------------------------------------------------------------- |
185: #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT) |
186: for (int iat = first; iat < last; ++iat) |
187: { |
188: const T flip = iat < flip_ind ? one : minusone; |
189: const T displ_0 = (px[iat] - x0) * flip; |
190: const T displ_1 = (py[iat] - y0) * flip; |
191: const T displ_2 = (pz[iat] - z0) * flip; |
192: |
193: const T ar_0 = -std::floor(displ_0 * g00 + displ_1 * g10 + displ_2 * g20); |
194: const T ar_1 = -std::floor(displ_0 * g01 + displ_1 * g11 + displ_2 * g21); |
195: const T ar_2 = -std::floor(displ_0 * g02 + displ_1 * g12 + displ_2 * g22); |
196: |
197: const T delx = displ_0 + ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
198: const T dely = displ_1 + ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
199: const T delz = displ_2 + ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
200: |
201: T rmin = delx * delx + dely * dely + delz * delz; |
202: int ic = 0; |
203: #pragma unroll(7) |
204: for (int c = 1; c < 8; ++c) |
205: { |
206: const T x = delx + cellx[c]; |
207: const T y = dely + celly[c]; |
208: const T z = delz + cellz[c]; |
209: const T r2 = x * x + y * y + z * z; |
210: ic = (r2 < rmin) ? c : ic; |
211: rmin = (r2 < rmin) ? r2 : rmin; |
212: } |
213: |
214: temp_r[iat] = std::sqrt(rmin); |
215: dx[iat] = flip * (delx + cellx[ic]); |
216: dy[iat] = flip * (dely + celly[ic]); |
217: dz[iat] = flip * (delz + cellz[ic]); |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►59.98+ | qmcplusplus::DistanceTableAA<d[...] | DistanceTableAA.h:83 | exec |
○ | qmcplusplus::ParticleSet::make[...] | ParticleSet.cpp:290 | exec |
○ | main.extracted.104 | miniqmc.cpp:482 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 | |
►15.30+ | qmcplusplus::DistanceTableAA<d[...] | DistanceTableAA.h:76 | exec |
○ | qmcplusplus::ParticleSet::setA[...] | ParticleSet.cpp:259 | exec |
○ | main.extracted.104 | refwrap.h:347 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 | |
►14.56+ | qmcplusplus::DistanceTableAA<d[...] | DistanceTableAA.h:83 | exec |
○ | qmcplusplus::ParticleSet::make[...] | ParticleSet.cpp:290 | exec |
○ | main.extracted.104 | refwrap.h:347 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 | |
►4.94+ | qmcplusplus::DistanceTableBA<d[...] | DistanceTableBA.h:119 | exec |
○ | qmcplusplus::ParticleSet::make[...] | ParticleSet.cpp:290 | exec |
○ | main.extracted.104 | miniqmc.cpp:482 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 | |
►1.16+ | qmcplusplus::DistanceTableBA<d[...] | DistanceTableBA.h:119 | exec |
○ | qmcplusplus::ParticleSet::make[...] | ParticleSet.cpp:290 | exec |
○ | main.extracted.104 | refwrap.h:347 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 | |
►1.08+ | qmcplusplus::DistanceTableBA<d[...] | DistanceTableBA.h:113 | exec |
○ | qmcplusplus::ParticleSet::setA[...] | ParticleSet.cpp:259 | exec |
○ | main.extracted.104 | refwrap.h:347 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.04 |
CQA speedup if FP arith vectorized | 1.70 |
CQA speedup if fully vectorized | 2.07 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.28 |
Bottlenecks | P0, P1, |
Function | void qmcplusplus::DTD_BConds |
Source | ParticleBConds.h:185-217 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 48.50 |
CQA cycles if no scalar integer | 46.50 |
CQA cycles if FP arith vectorized | 28.50 |
CQA cycles if fully vectorized | 23.44 |
Front-end cycles | 38.00 |
DIV/SQRT cycles | 48.50 |
P0 cycles | 48.50 |
P1 cycles | 23.00 |
P2 cycles | 23.00 |
P3 cycles | 4.00 |
P4 cycles | 14.00 |
P5 cycles | 3.00 |
P6 cycles | 4.00 |
P7 cycles | 9.00 - 12.00 |
Inter-iter dependencies cycles | 0 |
FE+BE cycles (UFS) | 101.52 - 104.51 |
Stall cycles (UFS) | 64.49 - 67.48 |
Nb insns | 141.00 |
Nb uops | 152.00 |
Nb loads | 37.00 |
Nb stores | 4.00 |
Nb stack references | 21.00 |
FLOP/cycle | 8.82 |
Nb FLOP add-sub | 108.00 |
Nb FLOP mul | 68.00 |
Nb FLOP fma | 124.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 4.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 22.27 |
Bytes prefetched | 0.00 |
Bytes loaded | 952.00 |
Bytes stored | 128.00 |
Stride 0 | 9.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 1.00 |
Vectorization ratio all | 92.54 |
Vectorization ratio load | 78.38 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 80.00 |
Vector-efficiency ratio all | 46.22 |
Vector-efficiency ratio load | 41.22 |
Vector-efficiency ratio store | 50.00 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 49.11 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 50.00 |
Vector-efficiency ratio other | 40.38 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.04 |
CQA speedup if FP arith vectorized | 1.70 |
CQA speedup if fully vectorized | 2.07 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.28 |
Bottlenecks | P0, P1, |
Function | void qmcplusplus::DTD_BConds |
Source | ParticleBConds.h:185-217 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 48.50 |
CQA cycles if no scalar integer | 46.50 |
CQA cycles if FP arith vectorized | 28.50 |
CQA cycles if fully vectorized | 23.44 |
Front-end cycles | 38.00 |
DIV/SQRT cycles | 48.50 |
P0 cycles | 48.50 |
P1 cycles | 23.00 |
P2 cycles | 23.00 |
P3 cycles | 4.00 |
P4 cycles | 14.00 |
P5 cycles | 3.00 |
P6 cycles | 4.00 |
P7 cycles | 9.00 - 12.00 |
Inter-iter dependencies cycles | 0 |
FE+BE cycles (UFS) | 101.52 - 104.51 |
Stall cycles (UFS) | 64.49 - 67.48 |
Nb insns | 141.00 |
Nb uops | 152.00 |
Nb loads | 37.00 |
Nb stores | 4.00 |
Nb stack references | 21.00 |
FLOP/cycle | 8.82 |
Nb FLOP add-sub | 108.00 |
Nb FLOP mul | 68.00 |
Nb FLOP fma | 124.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 4.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 22.27 |
Bytes prefetched | 0.00 |
Bytes loaded | 952.00 |
Bytes stored | 128.00 |
Stride 0 | 9.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 1.00 |
Vectorization ratio all | 92.54 |
Vectorization ratio load | 78.38 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 80.00 |
Vector-efficiency ratio all | 46.22 |
Vector-efficiency ratio load | 41.22 |
Vector-efficiency ratio store | 50.00 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 49.11 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 50.00 |
Vector-efficiency ratio other | 40.38 |
Path / |
Function | void qmcplusplus::DTD_BConds |
Source file and lines | ParticleBConds.h:185-217 |
Module | exec |
nb instructions | 141 |
nb uops | 152 |
loop length | 885 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 31 |
used zmm registers | 0 |
nb stack references | 21 |
ADD-SUB / MUL ratio | 1.59 |
micro-operation queue | 38.00 cycles |
front end | 38.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 48.50 | 48.50 | 23.00 | 23.00 | 4.00 | 14.00 | 3.00 | 4.00 |
cycles | 48.50 | 48.50 | 23.00 | 23.00 | 4.00 | 14.00 | 3.00 | 4.00 |
Cycles executing div or sqrt instructions | 9.00-12.00 |
Longest recurrence chain latency (RecMII) | 0.00 |
FE+BE cycles | 101.52-104.51 |
Stall cycles | 64.49-67.48 |
RS full (events) | 1.04-0.98 |
PRF_FLOAT full (events) | 66.94-69.95 |
Front-end | 38.00 |
Dispatch | 48.50 |
DIV/SQRT | 9.00-12.00 |
Data deps. | 0.00 |
Overall L1 | 48.50 |
all | 20% |
load | 14% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 98% |
load | 93% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 95% |
all | 92% |
load | 78% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 80% |
all | 14% |
load | 14% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 48% |
load | 47% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 46% |
all | 46% |
load | 41% |
store | 50% |
mul | 50% |
add-sub | 49% |
fma | 50% |
div/sqrt | 50% |
other | 40% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
LEA (%R9,%RDX,1),%EAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTD %EAX,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPADDD 0x3df4e(%RIP),%XMM3,%XMM3 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
CLTQ | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
VPCMPGTD %XMM3,%XMM21,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVUPD (%R13,%RAX,8),%YMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VSUBPD 0x300(%RSP),%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD (%R12,%RAX,8),%YMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VBROADCASTSD 0x37bd7(%RIP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD 0x37bc5(%RIP),%YMM20 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBLENDMPD %YMM20,%YMM14,%YMM25{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VMULPD %YMM25,%YMM3,%YMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD 0x340(%RSP),%YMM4,%YMM3 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD (%RSI,%RAX,8),%YMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VSUBPD 0x360(%RSP),%YMM4,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM25,%YMM3,%YMM26 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM25,%YMM4,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD 0x320(%RSP),%YMM28,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x380(%RSP),%YMM26,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x1a0(%RSP),%YMM3,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALEPD $0x9,%YMM4,%YMM27 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VMULPD 0x2c0(%RSP),%YMM28,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x2e0(%RSP),%YMM26,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x1c0(%RSP),%YMM3,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VROUNDPD $0x9,%YMM4,%YMM4 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VMULPD 0x1e0(%RSP),%YMM28,%YMM29 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x200(%RSP),%YMM26,%YMM29 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x3c0(%RSP),%YMM3,%YMM29 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALEPD $0x9,%YMM29,%YMM29 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VFMSUB231PD %YMM2,%YMM27,%YMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x180(%RSP),%YMM4,%YMM28 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMSUB231PD %YMM29,%YMM7,%YMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMSUB231PD %YMM1,%YMM27,%YMM26 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x160(%RSP),%YMM4,%YMM26 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMSUB213PD %YMM3,%YMM0,%YMM27 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMSUB231PD %YMM24,%YMM29,%YMM26 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x3a0(%RSP),%YMM4,%YMM27 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMSUB231PD 0x3e0(%RSP),%YMM29,%YMM27 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM28,%YMM28,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM26,%YMM26,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD 0x420(%RSP),%YMM28,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM27,%YMM27,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD 0x400(%RSP),%YMM26,%YMM29 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM7,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM31,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VADDPD 0x460(%RSP),%YMM27,%YMM31 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM4,%YMM4,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM29,%YMM29,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM28,%YMM5,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM31,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM26,%YMM6,%YMM31 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM2,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM1,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM5,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VADDPD 0x440(%RSP),%YMM27,%YMM5 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM29,%YMM29,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM31,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM5,%YMM5,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMINPD %YMM3,%YMM4,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPD $0x1,%YMM5,%YMM29,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMINPD %YMM5,%YMM29,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD 0x120(%RSP),%YMM28,%YMM29 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM26,%YMM23,%YMM31 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM29,%YMM29,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM6,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VADDPD %YMM27,%YMM19,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM31,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM6,%YMM6,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPD $0x1,%YMM5,%YMM29,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %YMM28,%YMM17,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM26,%YMM18,%YMM31 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMINPD %YMM5,%YMM29,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM27,%YMM16,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM6,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM31,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM29,%YMM29,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM28,%YMM13,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPD $0x1,%YMM5,%YMM6,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %YMM26,%YMM15,%YMM31 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM29,%YMM29,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM31,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM27,%YMM30,%YMM31 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM31,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMINPD %YMM5,%YMM6,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM28,%YMM11,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM26,%YMM12,%YMM31 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM6,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM31,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM7,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM20,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VCMPPD $0x1,%YMM5,%YMM29,%K4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMINPD %YMM5,%YMM29,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM27,%YMM10,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM29,%YMM29,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPD $0x1,%YMM5,%YMM6,%K5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMINPD %YMM5,%YMM6,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM28,%YMM8,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM26,%YMM9,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM6,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM29,%YMM29,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM27,%YMM31,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM29,%YMM29,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPD $0x1,%YMM5,%YMM6,%K6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMINPD %YMM5,%YMM6,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM22,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VCMPPD $0x1,%YMM3,%YMM4,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSQRTPD %YMM5,%YMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 9-12 |
VMOVAPD %YMM0,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM14,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VANDPD 0x37965(%RIP){1to4},%YMM3,%YMM3 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ 0x3dc63(%RIP),%YMM3{%K1} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x3dc61(%RIP),%YMM3{%K2} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x3dc5f(%RIP),%YMM3{%K3} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x3dc5d(%RIP),%YMM3{%K4} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x3dc5b(%RIP),%YMM3{%K5} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x3dc59(%RIP),%YMM3{%K6} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVUPD %YMM4,(%RCX,%RAX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD 0x90(%RDI,%YMM3,8),%YMM4{%K1} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VADDPD %YMM28,%YMM4,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM25,%YMM4,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVUPD %YMM4,(%R10,%RAX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD 0xd0(%RDI,%YMM3,8),%YMM4{%K1} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VADDPD %YMM26,%YMM4,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM25,%YMM4,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVUPD %YMM4,(%R8,%RAX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD 0x110(%RDI,%YMM3,8),%YMM4{%K1} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VADDPD %YMM27,%YMM4,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM25,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM3,(%R11,%RAX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x4,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %EBX,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 45e420 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
Function | void qmcplusplus::DTD_BConds |
Source file and lines | ParticleBConds.h:185-217 |
Module | exec |
nb instructions | 141 |
nb uops | 152 |
loop length | 885 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 31 |
used zmm registers | 0 |
nb stack references | 21 |
ADD-SUB / MUL ratio | 1.59 |
micro-operation queue | 38.00 cycles |
front end | 38.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 48.50 | 48.50 | 23.00 | 23.00 | 4.00 | 14.00 | 3.00 | 4.00 |
cycles | 48.50 | 48.50 | 23.00 | 23.00 | 4.00 | 14.00 | 3.00 | 4.00 |
Cycles executing div or sqrt instructions | 9.00-12.00 |
Longest recurrence chain latency (RecMII) | 0.00 |
FE+BE cycles | 101.52-104.51 |
Stall cycles | 64.49-67.48 |
RS full (events) | 1.04-0.98 |
PRF_FLOAT full (events) | 66.94-69.95 |
Front-end | 38.00 |
Dispatch | 48.50 |
DIV/SQRT | 9.00-12.00 |
Data deps. | 0.00 |
Overall L1 | 48.50 |
all | 20% |
load | 14% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 98% |
load | 93% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 95% |
all | 92% |
load | 78% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 80% |
all | 14% |
load | 14% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 48% |
load | 47% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 46% |
all | 46% |
load | 41% |
store | 50% |
mul | 50% |
add-sub | 49% |
fma | 50% |
div/sqrt | 50% |
other | 40% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
LEA (%R9,%RDX,1),%EAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTD %EAX,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPADDD 0x3df4e(%RIP),%XMM3,%XMM3 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
CLTQ | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
VPCMPGTD %XMM3,%XMM21,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVUPD (%R13,%RAX,8),%YMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VSUBPD 0x300(%RSP),%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD (%R12,%RAX,8),%YMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VBROADCASTSD 0x37bd7(%RIP),%YMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD 0x37bc5(%RIP),%YMM20 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBLENDMPD %YMM20,%YMM14,%YMM25{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VMULPD %YMM25,%YMM3,%YMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD 0x340(%RSP),%YMM4,%YMM3 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD (%RSI,%RAX,8),%YMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VSUBPD 0x360(%RSP),%YMM4,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM25,%YMM3,%YMM26 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM25,%YMM4,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD 0x320(%RSP),%YMM28,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x380(%RSP),%YMM26,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x1a0(%RSP),%YMM3,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALEPD $0x9,%YMM4,%YMM27 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VMULPD 0x2c0(%RSP),%YMM28,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x2e0(%RSP),%YMM26,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x1c0(%RSP),%YMM3,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VROUNDPD $0x9,%YMM4,%YMM4 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VMULPD 0x1e0(%RSP),%YMM28,%YMM29 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x200(%RSP),%YMM26,%YMM29 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x3c0(%RSP),%YMM3,%YMM29 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALEPD $0x9,%YMM29,%YMM29 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VFMSUB231PD %YMM2,%YMM27,%YMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x180(%RSP),%YMM4,%YMM28 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMSUB231PD %YMM29,%YMM7,%YMM28 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMSUB231PD %YMM1,%YMM27,%YMM26 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x160(%RSP),%YMM4,%YMM26 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMSUB213PD %YMM3,%YMM0,%YMM27 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMSUB231PD %YMM24,%YMM29,%YMM26 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x3a0(%RSP),%YMM4,%YMM27 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMSUB231PD 0x3e0(%RSP),%YMM29,%YMM27 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM28,%YMM28,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM26,%YMM26,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD 0x420(%RSP),%YMM28,%YMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM27,%YMM27,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD 0x400(%RSP),%YMM26,%YMM29 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM7,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM31,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VADDPD 0x460(%RSP),%YMM27,%YMM31 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM4,%YMM4,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM29,%YMM29,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM28,%YMM5,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM31,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM26,%YMM6,%YMM31 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM2,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM1,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM5,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VADDPD 0x440(%RSP),%YMM27,%YMM5 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM29,%YMM29,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM31,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM5,%YMM5,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMINPD %YMM3,%YMM4,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPD $0x1,%YMM5,%YMM29,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMINPD %YMM5,%YMM29,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD 0x120(%RSP),%YMM28,%YMM29 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM26,%YMM23,%YMM31 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM29,%YMM29,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM6,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VADDPD %YMM27,%YMM19,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM31,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM6,%YMM6,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPD $0x1,%YMM5,%YMM29,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %YMM28,%YMM17,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM26,%YMM18,%YMM31 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMINPD %YMM5,%YMM29,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM27,%YMM16,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM6,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM31,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM29,%YMM29,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM28,%YMM13,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPD $0x1,%YMM5,%YMM6,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %YMM26,%YMM15,%YMM31 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM29,%YMM29,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM31,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM27,%YMM30,%YMM31 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM31,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMINPD %YMM5,%YMM6,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM28,%YMM11,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM26,%YMM12,%YMM31 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM6,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM31,%YMM31,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM7,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM20,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VCMPPD $0x1,%YMM5,%YMM29,%K4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMINPD %YMM5,%YMM29,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM27,%YMM10,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM29,%YMM29,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPD $0x1,%YMM5,%YMM6,%K5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMINPD %YMM5,%YMM6,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM28,%YMM8,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM26,%YMM9,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM6,%YMM6,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM29,%YMM29,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD %YMM27,%YMM31,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM29,%YMM29,%YMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCMPPD $0x1,%YMM5,%YMM6,%K6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMINPD %YMM5,%YMM6,%YMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM22,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VCMPPD $0x1,%YMM3,%YMM4,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSQRTPD %YMM5,%YMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 9-12 |
VMOVAPD %YMM0,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVAPD %YMM14,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VANDPD 0x37965(%RIP){1to4},%YMM3,%YMM3 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ 0x3dc63(%RIP),%YMM3{%K1} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x3dc61(%RIP),%YMM3{%K2} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x3dc5f(%RIP),%YMM3{%K3} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x3dc5d(%RIP),%YMM3{%K4} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x3dc5b(%RIP),%YMM3{%K5} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPBROADCASTQ 0x3dc59(%RIP),%YMM3{%K6} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVUPD %YMM4,(%RCX,%RAX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD 0x90(%RDI,%YMM3,8),%YMM4{%K1} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VADDPD %YMM28,%YMM4,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM25,%YMM4,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVUPD %YMM4,(%R10,%RAX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD 0xd0(%RDI,%YMM3,8),%YMM4{%K1} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VADDPD %YMM26,%YMM4,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM25,%YMM4,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVUPD %YMM4,(%R8,%RAX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD 0x110(%RDI,%YMM3,8),%YMM4{%K1} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VADDPD %YMM27,%YMM4,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM25,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM3,(%R11,%RAX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x4,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %EBX,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 45e420 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
Metric | run_0 |
---|---|
Coverage (% app. time) | 18.02 |
Time (s) | 12.06 |
Instance Count | 374184 |
Iteration Count - min | 128 |
Iteration Count - avg | 843.56 |
Iteration Count - max | 1536 |
Cycles per Iteration - min | 78.9 |
Cycles per Iteration - avg | 81.25 |
Cycles per Iteration - max | 6078.7 |