Loop Id: 1149 | Module: exec | Source: DistanceTableBA.h:99-101 [...] | Coverage: 0.11% |
---|
Loop Id: 1149 | Module: exec | Source: DistanceTableBA.h:99-101 [...] | Coverage: 0.11% |
---|
0x498700 MOVSXD %ECX,%RDX |
0x498703 VMOVSD %XMM10,%XMM10,%XMM5 |
0x498707 TEST %ECX,%ECX |
0x498709 JNS 498717 |
0x49870b MOV 0x378ce(%RIP),%RAX [6] |
0x498712 VMOVQ %RAX,%XMM5 |
0x498717 LEA (%RDX,%R14,1),%RDI |
0x49871b VMOVSD (%R15,%RDX,8),%XMM2 [8] |
0x498721 LEA (%R11,%RDX,1),%RSI |
0x498725 VMOVSD (%R15,%RDI,8),%XMM4 [7] |
0x49872b VMOVSD (%R15,%RSI,8),%XMM11 [3] |
0x498731 VSUBSD %XMM9,%XMM2,%XMM3 |
0x498736 VSUBSD %XMM8,%XMM4,%XMM6 |
0x49873b VSUBSD %XMM7,%XMM11,%XMM0 |
0x49873f VMULSD %XMM5,%XMM3,%XMM3 |
0x498743 VMULSD %XMM5,%XMM6,%XMM2 |
0x498747 VMULSD %XMM5,%XMM0,%XMM0 |
0x49874b VMULSD %XMM27,%XMM2,%XMM4 |
0x498751 VMULSD %XMM30,%XMM2,%XMM1 |
0x498757 VFMADD231SD %XMM28,%XMM3,%XMM4 |
0x49875d VFMADD231SD %XMM31,%XMM3,%XMM1 |
0x498763 VFMADD231SD %XMM26,%XMM0,%XMM4 |
0x498769 VFMADD231SD %XMM29,%XMM0,%XMM1 |
0x49876f VRNDSCALESD $0x9,%XMM4,%XMM4,%XMM6 |
0x498776 VMULSD %XMM24,%XMM2,%XMM4 |
0x49877c VRNDSCALESD $0x9,%XMM1,%XMM1,%XMM11 |
0x498783 VXORPD 0x36525(%RIP),%XMM6,%XMM1 [6] |
0x49878b VMULSD %XMM21,%XMM1,%XMM6 |
0x498791 VFMADD231SD %XMM25,%XMM3,%XMM4 |
0x498797 VFNMADD231SD %XMM22,%XMM11,%XMM6 |
0x49879d VFMADD231SD %XMM23,%XMM0,%XMM4 |
0x4987a3 VRNDSCALESD $0x9,%XMM4,%XMM4,%XMM4 |
0x4987aa VFNMADD231SD %XMM20,%XMM4,%XMM3 |
0x4987b0 VFNMADD231SD %XMM17,%XMM4,%XMM2 |
0x4987b6 VFNMADD231SD %XMM14,%XMM4,%XMM0 |
0x4987bb VADDSD %XMM3,%XMM6,%XMM3 |
0x4987bf VMULSD %XMM18,%XMM1,%XMM6 |
0x4987c5 VMULSD %XMM15,%XMM1,%XMM1 |
0x4987ca VFNMADD231SD %XMM19,%XMM11,%XMM6 |
0x4987d0 VFNMADD231SD %XMM16,%XMM11,%XMM1 |
0x4987d6 VADDSD %XMM13,%XMM3,%XMM11 |
0x4987db VADDSD %XMM2,%XMM6,%XMM2 |
0x4987df VADDSD %XMM0,%XMM1,%XMM0 |
0x4987e3 VADDSD %XMM12,%XMM2,%XMM6 |
0x4987e8 VMULSD %XMM2,%XMM2,%XMM4 |
0x4987ec VADDSD -0x30(%RBP),%XMM0,%XMM1 [9] |
0x4987f1 VMULSD %XMM6,%XMM6,%XMM6 |
0x4987f5 VFMADD231SD %XMM3,%XMM3,%XMM4 |
0x4987fa VFMADD132SD %XMM11,%XMM6,%XMM11 |
0x4987ff VADDSD -0xc0(%RBP),%XMM0,%XMM6 [9] |
0x498807 VFMADD231SD %XMM0,%XMM0,%XMM4 |
0x49880c VFMADD132SD %XMM1,%XMM11,%XMM1 |
0x498811 VADDSD -0xa0(%RBP),%XMM2,%XMM11 [9] |
0x498819 VMULSD %XMM11,%XMM11,%XMM11 |
0x49881e VCOMISD %XMM1,%XMM4 |
0x498822 VMINSD %XMM1,%XMM4,%XMM4 |
0x498826 VADDSD -0x98(%RBP),%XMM3,%XMM1 [9] |
0x49882e VFMADD132SD %XMM1,%XMM11,%XMM1 |
0x498833 VADDSD -0x88(%RBP),%XMM2,%XMM11 [9] |
0x49883b SETA %R10B |
0x49883f MOVZX %R10B,%EAX |
0x498843 MOV $0x2,%R10D |
0x498849 VMULSD %XMM11,%XMM11,%XMM11 |
0x49884e VFMADD231SD %XMM6,%XMM6,%XMM1 |
0x498853 VADDSD -0x80(%RBP),%XMM0,%XMM6 [9] |
0x498858 VCOMISD %XMM1,%XMM4 |
0x49885c VMINSD %XMM4,%XMM1,%XMM1 |
0x498860 VADDSD -0x90(%RBP),%XMM3,%XMM4 [9] |
0x498868 VFMADD132SD %XMM4,%XMM11,%XMM4 |
0x49886d VADDSD -0x70(%RBP),%XMM2,%XMM11 [9] |
0x498872 CMOVA %R10,%RAX |
0x498876 MOV $0x3,%R10D |
0x49887c VMULSD %XMM11,%XMM11,%XMM11 |
0x498881 VFMADD231SD %XMM6,%XMM6,%XMM4 |
0x498886 VADDSD -0x68(%RBP),%XMM0,%XMM6 [9] |
0x49888b VCOMISD %XMM4,%XMM1 |
0x49888f VMINSD %XMM1,%XMM4,%XMM4 |
0x498893 VADDSD -0x78(%RBP),%XMM3,%XMM1 [9] |
0x498898 VFMADD132SD %XMM1,%XMM11,%XMM1 |
0x49889d VADDSD -0x58(%RBP),%XMM2,%XMM11 [9] |
0x4988a2 CMOVA %R10,%RAX |
0x4988a6 MOV $0x4,%R10D |
0x4988ac VMULSD %XMM11,%XMM11,%XMM11 |
0x4988b1 VFMADD231SD %XMM6,%XMM6,%XMM1 |
0x4988b6 VADDSD -0x50(%RBP),%XMM0,%XMM6 [9] |
0x4988bb VCOMISD %XMM1,%XMM4 |
0x4988bf VMINSD %XMM4,%XMM1,%XMM1 |
0x4988c3 VADDSD -0x60(%RBP),%XMM3,%XMM4 [9] |
0x4988c8 VFMADD132SD %XMM4,%XMM11,%XMM4 |
0x4988cd VADDSD -0x40(%RBP),%XMM2,%XMM11 [9] |
0x4988d2 CMOVA %R10,%RAX |
0x4988d6 MOV $0x5,%R10D |
0x4988dc VMULSD %XMM11,%XMM11,%XMM11 |
0x4988e1 VFMADD231SD %XMM6,%XMM6,%XMM4 |
0x4988e6 VADDSD -0x38(%RBP),%XMM0,%XMM6 [9] |
0x4988eb VCOMISD %XMM4,%XMM1 |
0x4988ef VMINSD %XMM1,%XMM4,%XMM4 |
0x4988f3 VADDSD -0x48(%RBP),%XMM3,%XMM1 [9] |
0x4988f8 VFMADD132SD %XMM1,%XMM11,%XMM1 |
0x4988fd VADDSD -0xb0(%RBP),%XMM2,%XMM11 [9] |
0x498905 CMOVA %R10,%RAX |
0x498909 MOV $0x6,%R10D |
0x49890f VMULSD %XMM11,%XMM11,%XMM11 |
0x498914 VFMADD231SD %XMM6,%XMM6,%XMM1 |
0x498919 VADDSD -0xa8(%RBP),%XMM0,%XMM6 [9] |
0x498921 VCOMISD %XMM1,%XMM4 |
0x498925 VMINSD %XMM4,%XMM1,%XMM1 |
0x498929 VADDSD -0xb8(%RBP),%XMM3,%XMM4 [9] |
0x498931 VFMADD132SD %XMM4,%XMM11,%XMM4 |
0x498936 CMOVA %R10,%RAX |
0x49893a MOV $0x7,%R10D |
0x498940 VFMADD231SD %XMM6,%XMM6,%XMM4 |
0x498945 VCOMISD %XMM4,%XMM1 |
0x498949 VMINSD %XMM4,%XMM1,%XMM1 |
0x49894d CMOVA %R10,%RAX |
0x498951 VSQRTSD %XMM1,%XMM1,%XMM1 |
0x498955 LEA (%RBX,%RDX,8),%R10 |
0x498959 INC %RCX |
0x49895c LEA (%R9,%RAX,8),%RAX |
0x498960 VADDSD 0x180(%RAX),%XMM3,%XMM3 [1] |
0x498968 VADDSD 0x1c0(%RAX),%XMM2,%XMM2 [1] |
0x498970 VMOVSD %XMM1,(%R10,%R13,1) [2] |
0x498976 VADDSD 0x200(%RAX),%XMM0,%XMM0 [1] |
0x49897e VMULSD %XMM5,%XMM3,%XMM4 |
0x498982 VMULSD %XMM5,%XMM2,%XMM6 |
0x498986 VMULSD %XMM5,%XMM0,%XMM5 |
0x49898a VMOVSD %XMM4,(%R12,%RDX,8) [5] |
0x498990 VMOVSD %XMM6,(%R12,%RDI,8) [10] |
0x498996 VMOVSD %XMM5,(%R12,%RSI,8) [4] |
0x49899c CMP %R8,%RCX |
0x49899f JNE 498700 |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds.h: 249 - 278 |
-------------------------------------------------------------------------------- |
249: const T flip = iat < flip_ind ? one : minusone; |
250: const T displ_0 = (px[iat] - x0) * flip; |
251: const T displ_1 = (py[iat] - y0) * flip; |
252: const T displ_2 = (pz[iat] - z0) * flip; |
253: |
254: const T ar_0 = -std::floor(displ_0 * g00 + displ_1 * g10 + displ_2 * g20); |
255: const T ar_1 = -std::floor(displ_0 * g01 + displ_1 * g11 + displ_2 * g21); |
256: const T ar_2 = -std::floor(displ_0 * g02 + displ_1 * g12 + displ_2 * g22); |
257: |
258: const T delx = displ_0 + ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
259: const T dely = displ_1 + ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
260: const T delz = displ_2 + ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
261: |
262: T rmin = delx * delx + dely * dely + delz * delz; |
263: int ic = 0; |
264: #pragma unroll(7) |
265: for (int c = 1; c < 8; ++c) |
266: { |
267: const T x = delx + cellx[c]; |
268: const T y = dely + celly[c]; |
269: const T z = delz + cellz[c]; |
270: const T r2 = x * x + y * y + z * z; |
271: ic = (r2 < rmin) ? c : ic; |
272: rmin = (r2 < rmin) ? r2 : rmin; |
273: } |
274: |
275: temp_r[iat] = std::sqrt(rmin); |
276: dx[iat] = flip * (delx + cellx[ic]); |
277: dy[iat] = flip * (dely + celly[ic]); |
278: dz[iat] = flip * (delz + cellz[ic]); |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Particle/DistanceTableBA.h: 99 - 101 |
-------------------------------------------------------------------------------- |
99: for (size_t jel = first; jel < last; ++jel) |
100: { |
101: DTD_BConds<T, D, SC>::computeDistancesOffload(pos, src_ptr, nsrc_padded, dist_ptr + nsrc_padded * iat, |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | qmcplusplus::ParticleSet::upda[...] | ParticleSet.cpp:250 | exec |
○ | main._omp_fn.0 | miniqmc.cpp:390 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.02 |
CQA speedup if FP arith vectorized | 3.35 |
CQA speedup if fully vectorized | 8.08 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.19 |
Bottlenecks | |
Function | qmcplusplus::DistanceTableBA |
Source | ParticleBConds.h:249-278,DistanceTableBA.h:99-101 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 50.50 |
CQA cycles if no scalar integer | 49.50 |
CQA cycles if FP arith vectorized | 15.06 |
CQA cycles if fully vectorized | 6.25 |
Front-end cycles | 34.50 |
DIV/SQRT cycles | 50.50 |
P0 cycles | 50.25 |
P1 cycles | 13.25 |
P2 cycles | 13.25 |
P3 cycles | 4.00 |
P4 cycles | 13.75 |
P5 cycles | 15.00 |
P6 cycles | 4.00 |
P7 cycles | 4.50 - 6.00 |
Inter-iter dependencies cycles | 0 |
FE+BE cycles (UFS) | 65.19 - 65.20 |
Stall cycles (UFS) | 30.22 - 30.22 |
Nb insns | 130.00 |
Nb uops | 138.00 |
Nb loads | 26.50 |
Nb stores | 4.00 |
Nb stack references | 19.00 |
FLOP/cycle | 2.12 |
Nb FLOP add-sub | 30.00 |
Nb FLOP mul | 20.00 |
Nb FLOP fma | 28.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 1.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 4.99 |
Bytes prefetched | 0.00 |
Bytes loaded | 220.00 |
Bytes stored | 32.00 |
Stride 0 | 2.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 6.00 |
Stride indirect | 2.00 |
Vectorization ratio all | 0.94 |
Vectorization ratio load | 3.85 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 4.88 |
Vector-efficiency ratio all | 12.56 |
Vector-efficiency ratio load | 12.98 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | 12.50 |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 12.81 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.02 |
CQA speedup if FP arith vectorized | 3.35 |
CQA speedup if fully vectorized | 8.08 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.45 |
Bottlenecks | P0, P1, |
Function | qmcplusplus::DistanceTableBA |
Source | ParticleBConds.h:249-278,DistanceTableBA.h:99-101 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 50.50 |
CQA cycles if no scalar integer | 49.50 |
CQA cycles if FP arith vectorized | 15.06 |
CQA cycles if fully vectorized | 6.25 |
Front-end cycles | 34.75 |
DIV/SQRT cycles | 50.50 |
P0 cycles | 50.50 |
P1 cycles | 13.50 |
P2 cycles | 13.50 |
P3 cycles | 4.00 |
P4 cycles | 14.00 |
P5 cycles | 15.00 |
P6 cycles | 4.00 |
P7 cycles | 4.50 - 6.00 |
Inter-iter dependencies cycles | 0 |
FE+BE cycles (UFS) | 65.20 - 65.76 |
Stall cycles (UFS) | 29.96 - 30.51 |
Nb insns | 131.00 |
Nb uops | 139.00 |
Nb loads | 27.00 |
Nb stores | 4.00 |
Nb stack references | 19.00 |
FLOP/cycle | 2.12 |
Nb FLOP add-sub | 30.00 |
Nb FLOP mul | 20.00 |
Nb FLOP fma | 28.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 1.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 5.07 |
Bytes prefetched | 0.00 |
Bytes loaded | 224.00 |
Bytes stored | 32.00 |
Stride 0 | 2.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 6.00 |
Stride indirect | 2.00 |
Vectorization ratio all | 0.93 |
Vectorization ratio load | 3.85 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 4.76 |
Vector-efficiency ratio all | 12.56 |
Vector-efficiency ratio load | 12.98 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | 12.50 |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 12.80 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.02 |
CQA speedup if FP arith vectorized | 3.35 |
CQA speedup if fully vectorized | 8.08 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.01 |
Bottlenecks | P0, |
Function | qmcplusplus::DistanceTableBA |
Source | ParticleBConds.h:249-278,DistanceTableBA.h:99-101 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 50.50 |
CQA cycles if no scalar integer | 49.50 |
CQA cycles if FP arith vectorized | 15.06 |
CQA cycles if fully vectorized | 6.25 |
Front-end cycles | 34.25 |
DIV/SQRT cycles | 50.50 |
P0 cycles | 50.00 |
P1 cycles | 13.00 |
P2 cycles | 13.00 |
P3 cycles | 4.00 |
P4 cycles | 13.50 |
P5 cycles | 15.00 |
P6 cycles | 4.00 |
P7 cycles | 4.50 - 6.00 |
Inter-iter dependencies cycles | 0 |
FE+BE cycles (UFS) | 65.19 - 64.64 |
Stall cycles (UFS) | 30.48 - 29.93 |
Nb insns | 129.00 |
Nb uops | 137.00 |
Nb loads | 26.00 |
Nb stores | 4.00 |
Nb stack references | 19.00 |
FLOP/cycle | 2.12 |
Nb FLOP add-sub | 30.00 |
Nb FLOP mul | 20.00 |
Nb FLOP fma | 28.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 1.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 4.91 |
Bytes prefetched | 0.00 |
Bytes loaded | 216.00 |
Bytes stored | 32.00 |
Stride 0 | 2.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 6.00 |
Stride indirect | 2.00 |
Vectorization ratio all | 0.94 |
Vectorization ratio load | 3.85 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 5.00 |
Vector-efficiency ratio all | 12.56 |
Vector-efficiency ratio load | 12.98 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | 12.50 |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 12.81 |
Path / |
Function | qmcplusplus::DistanceTableBA |
Source file and lines | DistanceTableBA.h:99-101 |
Module | exec |
nb instructions | 130 |
nb uops | 138 |
loop length | 671 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 32 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 19 |
ADD-SUB / MUL ratio | 1.50 |
micro-operation queue | 34.50 cycles |
front end | 34.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 50.50 | 50.25 | 13.25 | 13.25 | 4.00 | 13.75 | 15.00 | 4.00 |
cycles | 50.50 | 50.25 | 13.25 | 13.25 | 4.00 | 13.75 | 15.00 | 4.00 |
Cycles executing div or sqrt instructions | 4.50-6.00 |
Longest recurrence chain latency (RecMII) | 0.00 |
FE+BE cycles | 65.19-65.20 |
Stall cycles | 30.22-30.22 |
RS full (events) | 61.26-61.03 |
Front-end | 34.50 |
Dispatch | 50.50 |
DIV/SQRT | 4.50-6.00 |
Data deps. | 0.00 |
Overall L1 | 50.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 3% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 5% |
all | 0% |
load | 3% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 4% |
all | 7% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | 12% |
other | 13% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | 12% |
other | 12% |
Function | qmcplusplus::DistanceTableBA |
Source file and lines | DistanceTableBA.h:99-101 |
Module | exec |
nb instructions | 131 |
nb uops | 139 |
loop length | 677 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 32 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 19 |
ADD-SUB / MUL ratio | 1.50 |
micro-operation queue | 34.75 cycles |
front end | 34.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 50.50 | 50.50 | 13.50 | 13.50 | 4.00 | 14.00 | 15.00 | 4.00 |
cycles | 50.50 | 50.50 | 13.50 | 13.50 | 4.00 | 14.00 | 15.00 | 4.00 |
Cycles executing div or sqrt instructions | 4.50-6.00 |
Longest recurrence chain latency (RecMII) | 0.00 |
FE+BE cycles | 65.20-65.76 |
Stall cycles | 29.96-30.51 |
RS full (events) | 61.24-61.49 |
Front-end | 34.75 |
Dispatch | 50.50 |
DIV/SQRT | 4.50-6.00 |
Data deps. | 0.00 |
Overall L1 | 50.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 3% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 5% |
all | 0% |
load | 3% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 4% |
all | 9% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | 12% |
other | 13% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
MOVSXD %ECX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD %XMM10,%XMM10,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNS 498717 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x378ce(%RIP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVQ %RAX,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
LEA (%RDX,%R14,1),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%R15,%RDX,8),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%R11,%RDX,1),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%R15,%RDI,8),%XMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD (%R15,%RSI,8),%XMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VSUBSD %XMM9,%XMM2,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM8,%XMM4,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM7,%XMM11,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM5,%XMM3,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM5,%XMM6,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM5,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM27,%XMM2,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM30,%XMM2,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM28,%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM31,%XMM3,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM26,%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM29,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALESD $0x9,%XMM4,%XMM4,%XMM6 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VMULSD %XMM24,%XMM2,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALESD $0x9,%XMM1,%XMM1,%XMM11 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VXORPD 0x36525(%RIP),%XMM6,%XMM1 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VMULSD %XMM21,%XMM1,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM25,%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231SD %XMM22,%XMM11,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM23,%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALESD $0x9,%XMM4,%XMM4,%XMM4 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VFNMADD231SD %XMM20,%XMM4,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231SD %XMM17,%XMM4,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231SD %XMM14,%XMM4,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM3,%XMM6,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM18,%XMM1,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM15,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231SD %XMM19,%XMM11,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231SD %XMM16,%XMM11,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM13,%XMM3,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM2,%XMM6,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM0,%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM12,%XMM2,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM2,%XMM2,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x30(%RBP),%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM6,%XMM6,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM3,%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM11,%XMM6,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0xc0(%RBP),%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM0,%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM1,%XMM11,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0xa0(%RBP),%XMM2,%XMM11 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM11,%XMM11,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD %XMM1,%XMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMINSD %XMM1,%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x98(%RBP),%XMM3,%XMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM1,%XMM11,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x88(%RBP),%XMM2,%XMM11 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SETA %R10B | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOVZX %R10B,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x2,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMULSD %XMM11,%XMM11,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM6,%XMM6,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x80(%RBP),%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD %XMM1,%XMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMINSD %XMM4,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x90(%RBP),%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM4,%XMM11,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x70(%RBP),%XMM2,%XMM11 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMOVA %R10,%RAX | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 |
MOV $0x3,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMULSD %XMM11,%XMM11,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM6,%XMM6,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x68(%RBP),%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD %XMM4,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMINSD %XMM1,%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x78(%RBP),%XMM3,%XMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM1,%XMM11,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x58(%RBP),%XMM2,%XMM11 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMOVA %R10,%RAX | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 |
MOV $0x4,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMULSD %XMM11,%XMM11,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM6,%XMM6,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x50(%RBP),%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD %XMM1,%XMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMINSD %XMM4,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x60(%RBP),%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM4,%XMM11,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x40(%RBP),%XMM2,%XMM11 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMOVA %R10,%RAX | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 |
MOV $0x5,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMULSD %XMM11,%XMM11,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM6,%XMM6,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x38(%RBP),%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD %XMM4,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMINSD %XMM1,%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x48(%RBP),%XMM3,%XMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM1,%XMM11,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0xb0(%RBP),%XMM2,%XMM11 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMOVA %R10,%RAX | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 |
MOV $0x6,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMULSD %XMM11,%XMM11,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM6,%XMM6,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0xa8(%RBP),%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD %XMM1,%XMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMINSD %XMM4,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0xb8(%RBP),%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM4,%XMM11,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMOVA %R10,%RAX | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 |
MOV $0x7,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VFMADD231SD %XMM6,%XMM6,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD %XMM4,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMINSD %XMM4,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMOVA %R10,%RAX | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 |
VSQRTSD %XMM1,%XMM1,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 4.50-6 |
LEA (%RBX,%RDX,8),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R9,%RAX,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VADDSD 0x180(%RAX),%XMM3,%XMM3 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD 0x1c0(%RAX),%XMM2,%XMM2 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM1,(%R10,%R13,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VADDSD 0x200(%RAX),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM5,%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM5,%XMM2,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM5,%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM4,(%R12,%RDX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD %XMM6,(%R12,%RDI,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD %XMM5,(%R12,%RSI,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP %R8,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 498700 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
Function | qmcplusplus::DistanceTableBA |
Source file and lines | DistanceTableBA.h:99-101 |
Module | exec |
nb instructions | 129 |
nb uops | 137 |
loop length | 665 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 32 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 19 |
ADD-SUB / MUL ratio | 1.50 |
micro-operation queue | 34.25 cycles |
front end | 34.25 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 50.50 | 50.00 | 13.00 | 13.00 | 4.00 | 13.50 | 15.00 | 4.00 |
cycles | 50.50 | 50.00 | 13.00 | 13.00 | 4.00 | 13.50 | 15.00 | 4.00 |
Cycles executing div or sqrt instructions | 4.50-6.00 |
Longest recurrence chain latency (RecMII) | 0.00 |
FE+BE cycles | 65.19-64.64 |
Stall cycles | 30.48-29.93 |
RS full (events) | 61.27-60.56 |
Front-end | 34.25 |
Dispatch | 50.50 |
DIV/SQRT | 4.50-6.00 |
Data deps. | 0.00 |
Overall L1 | 50.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 3% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 5% |
all | 0% |
load | 3% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 5% |
all | 6% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | 12% |
other | 13% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | 12% |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
MOVSXD %ECX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD %XMM10,%XMM10,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNS 498717 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA (%RDX,%R14,1),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%R15,%RDX,8),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%R11,%RDX,1),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%R15,%RDI,8),%XMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD (%R15,%RSI,8),%XMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VSUBSD %XMM9,%XMM2,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM8,%XMM4,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM7,%XMM11,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM5,%XMM3,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM5,%XMM6,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM5,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM27,%XMM2,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM30,%XMM2,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM28,%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM31,%XMM3,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM26,%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM29,%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALESD $0x9,%XMM4,%XMM4,%XMM6 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VMULSD %XMM24,%XMM2,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALESD $0x9,%XMM1,%XMM1,%XMM11 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VXORPD 0x36525(%RIP),%XMM6,%XMM1 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VMULSD %XMM21,%XMM1,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM25,%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231SD %XMM22,%XMM11,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM23,%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALESD $0x9,%XMM4,%XMM4,%XMM4 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VFNMADD231SD %XMM20,%XMM4,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231SD %XMM17,%XMM4,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231SD %XMM14,%XMM4,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM3,%XMM6,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM18,%XMM1,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM15,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231SD %XMM19,%XMM11,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231SD %XMM16,%XMM11,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM13,%XMM3,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM2,%XMM6,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM0,%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM12,%XMM2,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM2,%XMM2,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x30(%RBP),%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM6,%XMM6,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM3,%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM11,%XMM6,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0xc0(%RBP),%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM0,%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM1,%XMM11,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0xa0(%RBP),%XMM2,%XMM11 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM11,%XMM11,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD %XMM1,%XMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMINSD %XMM1,%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x98(%RBP),%XMM3,%XMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM1,%XMM11,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x88(%RBP),%XMM2,%XMM11 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SETA %R10B | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOVZX %R10B,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x2,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMULSD %XMM11,%XMM11,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM6,%XMM6,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x80(%RBP),%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD %XMM1,%XMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMINSD %XMM4,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x90(%RBP),%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM4,%XMM11,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x70(%RBP),%XMM2,%XMM11 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMOVA %R10,%RAX | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 |
MOV $0x3,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMULSD %XMM11,%XMM11,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM6,%XMM6,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x68(%RBP),%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD %XMM4,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMINSD %XMM1,%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x78(%RBP),%XMM3,%XMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM1,%XMM11,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x58(%RBP),%XMM2,%XMM11 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMOVA %R10,%RAX | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 |
MOV $0x4,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMULSD %XMM11,%XMM11,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM6,%XMM6,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x50(%RBP),%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD %XMM1,%XMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMINSD %XMM4,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x60(%RBP),%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM4,%XMM11,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x40(%RBP),%XMM2,%XMM11 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMOVA %R10,%RAX | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 |
MOV $0x5,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMULSD %XMM11,%XMM11,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM6,%XMM6,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x38(%RBP),%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD %XMM4,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMINSD %XMM1,%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0x48(%RBP),%XMM3,%XMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM1,%XMM11,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0xb0(%RBP),%XMM2,%XMM11 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMOVA %R10,%RAX | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 |
MOV $0x6,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMULSD %XMM11,%XMM11,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231SD %XMM6,%XMM6,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0xa8(%RBP),%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD %XMM1,%XMM4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMINSD %XMM4,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD -0xb8(%RBP),%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM4,%XMM11,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMOVA %R10,%RAX | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 |
MOV $0x7,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VFMADD231SD %XMM6,%XMM6,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCOMISD %XMM4,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMINSD %XMM4,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMOVA %R10,%RAX | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 |
VSQRTSD %XMM1,%XMM1,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 4.50-6 |
LEA (%RBX,%RDX,8),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R9,%RAX,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VADDSD 0x180(%RAX),%XMM3,%XMM3 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD 0x1c0(%RAX),%XMM2,%XMM2 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM1,(%R10,%R13,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VADDSD 0x200(%RAX),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM5,%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM5,%XMM2,%XMM6 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM5,%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM4,(%R12,%RDX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD %XMM6,(%R12,%RDI,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD %XMM5,(%R12,%RSI,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP %R8,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 498700 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |