Function: qmcplusplus::DistanceTableBA<double, 3u, 39>::evaluate(qmcplusplus::ParticleSet&) | Module: exec | Source: DistanceTableBA.h:66-105 [...] | Coverage: 0.15% |
---|
Function: qmcplusplus::DistanceTableBA<double, 3u, 39>::evaluate(qmcplusplus::ParticleSet&) | Module: exec | Source: DistanceTableBA.h:66-105 [...] | Coverage: 0.15% |
---|
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds.h: 231 - 278 |
-------------------------------------------------------------------------------- |
231: const T y0 = pos[1]; |
232: const T z0 = pos[2]; |
233: |
234: const T* restrict px = R0; |
235: const T* restrict py = R0 + r0_stride; |
236: const T* restrict pz = R0 + r0_stride * 2; |
237: |
238: T* restrict dx = temp_dr; |
239: T* restrict dy = temp_dr + padded_size; |
240: T* restrict dz = temp_dr + padded_size * 2; |
[...] |
249: const T flip = iat < flip_ind ? one : minusone; |
250: const T displ_0 = (px[iat] - x0) * flip; |
251: const T displ_1 = (py[iat] - y0) * flip; |
252: const T displ_2 = (pz[iat] - z0) * flip; |
253: |
254: const T ar_0 = -std::floor(displ_0 * g00 + displ_1 * g10 + displ_2 * g20); |
255: const T ar_1 = -std::floor(displ_0 * g01 + displ_1 * g11 + displ_2 * g21); |
256: const T ar_2 = -std::floor(displ_0 * g02 + displ_1 * g12 + displ_2 * g22); |
257: |
258: const T delx = displ_0 + ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
259: const T dely = displ_1 + ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
[...] |
267: const T x = delx + cellx[c]; |
268: const T y = dely + celly[c]; |
269: const T z = delz + cellz[c]; |
270: const T r2 = x * x + y * y + z * z; |
271: ic = (r2 < rmin) ? c : ic; |
272: rmin = (r2 < rmin) ? r2 : rmin; |
273: } |
274: |
275: temp_r[iat] = std::sqrt(rmin); |
276: dx[iat] = flip * (delx + cellx[ic]); |
277: dy[iat] = flip * (dely + celly[ic]); |
278: dz[iat] = flip * (delz + cellz[ic]); |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Particle/DistanceTableBA.h: 66 - 105 |
-------------------------------------------------------------------------------- |
66: { |
[...] |
73: const size_t ntgt_local = Ntargets; |
74: const size_t ntgt_padded = getAlignedSize<T>(Ntargets); |
75: const size_t nsrc_padded = getAlignedSize<T>(Nsources); |
76: |
77: auto* dist_ptr = Distances.data(); |
78: auto* displ_ptr = memoryPool.data(); |
79: auto* src_ptr = Origin->RSoA.data(); |
80: auto* tgt_ptr = P.RSoA.data(); |
81: |
82: const int ChunkSizePerTeam = 128; |
83: const size_t num_teams = (nsrc_padded + ChunkSizePerTeam - 1) / ChunkSizePerTeam; |
84: |
85: PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(Ntargets * num_teams) \ |
86: map(to: src_ptr[:D*Origin->RSoA.capacity()], tgt_ptr[:D*P.RSoA.capacity()]) \ |
87: map(from: dist_ptr[:Distances.size()], displ_ptr[:memoryPool.size()])") |
88: for (size_t iat = 0; iat < ntgt_local; ++iat) |
89: for (size_t team_id = 0; team_id < num_teams; ++team_id) |
90: { |
91: T pos[D]; |
92: for (int idim = 0; idim < D; idim++) |
93: pos[idim] = *(tgt_ptr + ntgt_padded * idim + iat); |
[...] |
99: for (size_t jel = first; jel < last; ++jel) |
100: { |
101: DTD_BConds<T, D, SC>::computeDistancesOffload(pos, src_ptr, nsrc_padded, dist_ptr + nsrc_padded * iat, |
102: displ_ptr + nsrc_padded * D * iat, nsrc_padded, jel); |
103: } |
104: } |
105: } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 248 - 248 |
-------------------------------------------------------------------------------- |
248: inline pointer data() { return X; } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/aligned_allocator.hpp: 46 - 46 |
-------------------------------------------------------------------------------- |
46: return ((n + ND - 1) / ND) * ND; |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 237 - 239 |
-------------------------------------------------------------------------------- |
237: T* data() { return myData; } |
238: ///return the base |
239: const T* data() const { return myData; } |
/usr/lib64/gcc/x86_64-pc-linux-gnu/13.1.1/../../../../include/c++/13.1.1/bits/stl_vector.h: 1258 - 1258 |
-------------------------------------------------------------------------------- |
1258: { return _M_data_ptr(this->_M_impl._M_start); } |
0x45f850 PUSH %RBP |
0x45f851 MOV %RSP,%RBP |
0x45f854 PUSH %R15 |
0x45f856 PUSH %R14 |
0x45f858 PUSH %R13 |
0x45f85a PUSH %R12 |
0x45f85c PUSH %RBX |
0x45f85d AND $-0x20,%RSP |
0x45f861 SUB $0x1c0,%RSP |
0x45f868 MOVSXD 0xc8(%RDI),%R8 |
0x45f86f TEST %R8,%R8 |
0x45f872 JE 45f898 |
0x45f874 MOVSXD 0xc4(%RDI),%RAX |
0x45f87b ADD $0x7,%RAX |
0x45f87f MOV %RAX,%RCX |
0x45f882 AND $-0x8,%RCX |
0x45f886 MOV %RCX,0x10(%RSP) |
0x45f88b ADD $0x7f,%RCX |
0x45f88f CMP $0x80,%RCX |
0x45f896 JAE 45f8aa |
0x45f898 LEA -0x28(%RBP),%RSP |
0x45f89c POP %RBX |
0x45f89d POP %R12 |
0x45f89f POP %R13 |
0x45f8a1 POP %R14 |
0x45f8a3 POP %R15 |
0x45f8a5 POP %RBP |
0x45f8a6 VZEROUPPER |
0x45f8a9 RET |
0x45f8aa MOV %RCX,%RBX |
0x45f8ad LEA 0x7(%R8),%RCX |
0x45f8b1 SHR $0x7,%RBX |
0x45f8b5 MOV 0x8(%RDI),%R9 |
0x45f8b9 MOV 0x40(%RDI),%RDX |
0x45f8bd MOV %RDX,0x48(%RSP) |
0x45f8c2 MOV 0xa8(%RDI),%RDX |
0x45f8c9 MOV %RDX,0x40(%RSP) |
0x45f8ce MOV 0x610(%R9),%RDX |
0x45f8d5 MOV 0x610(%RSI),%R11 |
0x45f8dc MOV 0x10(%RSP),%R10 |
0x45f8e1 MOVSXD %R10D,%R9 |
0x45f8e4 LEA (%R10,%R10,1),%R14D |
0x45f8e8 LEA (%RDX,%R9,8),%RSI |
0x45f8ec MOV %RSI,0x60(%RSP) |
0x45f8f1 MOVSXD %R14D,%RSI |
0x45f8f4 MOV %RDX,0x70(%RSP) |
0x45f8f9 LEA (%RDX,%RSI,8),%R9 |
0x45f8fd CMP $0x2,%RBX |
0x45f901 MOV $0x1,%EDX |
0x45f906 CMOVB %RDX,%RBX |
0x45f90a CMP $0x2,%R8 |
0x45f90e CMOVAE %R8,%RDX |
0x45f912 DEC %RDX |
0x45f915 MOV %RDX,0x38(%RSP) |
0x45f91a SHR $0x3,%RAX |
0x45f91e LEA (%R10,%R10,2),%RDX |
0x45f922 MOV %RDX,0x30(%RSP) |
0x45f927 MOV %EAX,%ESI |
0x45f929 SAL $0x4,%ESI |
0x45f92c MOVSXD %ESI,%RDX |
0x45f92f MOV %RDX,0x28(%RSP) |
0x45f934 SAL $0x3,%EAX |
0x45f937 CLTQ |
0x45f939 MOV %RAX,0x20(%RSP) |
0x45f93e DEC %RBX |
0x45f941 MOV %RBX,0x78(%RSP) |
0x45f946 AND $-0x8,%RCX |
0x45f94a VMOVQ %RCX,%XMM0 |
0x45f94f VPSLLDQ $0x8,%XMM0,%XMM0 |
0x45f954 VMOVUPD %XMM0,0xd0(%RSP) |
0x45f95d MOV %R11,0x68(%RSP) |
0x45f962 VPBROADCASTQ %R11,%XMM0 |
0x45f968 VMOVDQU %XMM0,0xc0(%RSP) |
0x45f971 ADD %RCX,%RCX |
0x45f974 MOV %RCX,0x50(%RSP) |
0x45f979 XOR %EBX,%EBX |
0x45f97b JMP 45f994 |
0x45f97d NOPL (%RAX) |
(1171) 0x45f980 MOV 0x58(%RSP),%RCX |
(1171) 0x45f985 LEA 0x1(%RCX),%RBX |
(1171) 0x45f989 CMP 0x38(%RSP),%RCX |
(1171) 0x45f98e JE 45f898 |
(1171) 0x45f994 MOV 0x10(%RSP),%RAX |
(1171) 0x45f999 IMUL %RBX,%RAX |
(1171) 0x45f99d MOV 0x48(%RSP),%RCX |
(1171) 0x45f9a2 LEA (%RCX,%RAX,8),%R8 |
(1171) 0x45f9a6 MOV 0x30(%RSP),%RAX |
(1171) 0x45f9ab IMUL %RBX,%RAX |
(1171) 0x45f9af MOV 0x40(%RSP),%RCX |
(1171) 0x45f9b4 LEA (%RCX,%RAX,8),%R11 |
(1171) 0x45f9b8 VMOVUPD 0xf0(%RDI),%XMM8 |
(1171) 0x45f9c0 VMOVUPD 0xf8(%RDI),%XMM1 |
(1171) 0x45f9c8 VMOVUPD 0x100(%RDI),%XMM2 |
(1171) 0x45f9d0 VMOVSD 0x120(%RDI),%XMM20 |
(1171) 0x45f9d7 VMOVSD 0x128(%RDI),%XMM15 |
(1171) 0x45f9df VMOVSD 0x130(%RDI),%XMM12 |
(1171) 0x45f9e7 VMOVSD 0x138(%RDI),%XMM21 |
(1171) 0x45f9ee VMOVSD 0x140(%RDI),%XMM22 |
(1171) 0x45f9f5 VMOVSD 0x148(%RDI),%XMM23 |
(1171) 0x45f9fc VMOVUPD 0x150(%RDI),%XMM3 |
(1171) 0x45fa04 VMOVUPD 0x158(%RDI),%XMM4 |
(1171) 0x45fa0c VMOVUPD 0x160(%RDI),%XMM0 |
(1171) 0x45fa14 MOV 0x28(%RSP),%RDX |
(1171) 0x45fa19 LEA (%RAX,%RDX,1),%RSI |
(1171) 0x45fa1d LEA (%RCX,%RSI,8),%R15 |
(1171) 0x45fa21 ADD 0x20(%RSP),%RAX |
(1171) 0x45fa26 LEA (%RCX,%RAX,8),%RAX |
(1171) 0x45fa2a VPBROADCASTQ %RBX,%XMM6 |
(1171) 0x45fa30 VPADDQ 0xd0(%RSP),%XMM6,%XMM6 |
(1171) 0x45fa39 VPSLLQ $0x3,%XMM6,%XMM6 |
(1171) 0x45fa3e VPADDQ 0xc0(%RSP),%XMM6,%XMM6 |
(1171) 0x45fa47 VMOVDQU %XMM6,0xf0(%RSP) |
(1171) 0x45fa50 VMOVSD 0x188(%RDI),%XMM24 |
(1171) 0x45fa57 VMOVSD 0x1c8(%RDI),%XMM25 |
(1171) 0x45fa5e VMOVSD 0x208(%RDI),%XMM6 |
(1171) 0x45fa66 VMOVSD %XMM6,0xb8(%RSP) |
(1171) 0x45fa6f VMOVSD 0x190(%RDI),%XMM6 |
(1171) 0x45fa77 VMOVSD %XMM6,0xa0(%RSP) |
(1171) 0x45fa80 VMOVSD 0x1d0(%RDI),%XMM6 |
(1171) 0x45fa88 VMOVSD %XMM6,0x98(%RSP) |
(1171) 0x45fa91 VMOVSD 0x210(%RDI),%XMM6 |
(1171) 0x45fa99 VMOVSD %XMM6,0x90(%RSP) |
(1171) 0x45faa2 VMOVSD 0x198(%RDI),%XMM6 |
(1171) 0x45faaa VMOVSD %XMM6,0x88(%RSP) |
(1171) 0x45fab3 VMOVSD 0x218(%RDI),%XMM6 |
(1171) 0x45fabb VMOVUPS %XMM6,0xe0(%RSP) |
(1171) 0x45fac4 VMOVUPS 0x1a0(%RDI),%YMM6 |
(1171) 0x45facc VMOVUPS %YMM6,0x180(%RSP) |
(1171) 0x45fad5 VMOVUPS 0x1e0(%RDI),%YMM6 |
(1171) 0x45fadd VMOVUPS %YMM6,0x160(%RSP) |
(1171) 0x45fae6 VMOVUPD 0x220(%RDI),%YMM6 |
(1171) 0x45faee VMOVUPD %YMM6,0x140(%RSP) |
(1171) 0x45faf7 VMOVHPD 0x170(%RDI),%XMM3,%XMM9 |
(1171) 0x45faff VMOVHPD 0x110(%RDI),%XMM8,%XMM16 |
(1171) 0x45fb06 VMOVHPD 0x108(%RDI),%XMM1,%XMM10 |
(1171) 0x45fb0e VMOVHPD 0x118(%RDI),%XMM2,%XMM11 |
(1171) 0x45fb16 VMOVHPD 0x168(%RDI),%XMM4,%XMM13 |
(1171) 0x45fb1e MOV 0x50(%RSP),%RCX |
(1171) 0x45fb23 MOV %RBX,0x58(%RSP) |
(1171) 0x45fb28 ADD %RBX,%RCX |
(1171) 0x45fb2b MOV %RCX,0x80(%RSP) |
(1171) 0x45fb33 VMOVHPD 0x178(%RDI),%XMM0,%XMM14 |
(1171) 0x45fb3b MOV $0x80,%EDX |
(1171) 0x45fb40 XOR %ECX,%ECX |
(1171) 0x45fb42 MOV %RCX,0x18(%RSP) |
(1171) 0x45fb47 XOR %ECX,%ECX |
(1171) 0x45fb49 VMOVSD 0xa0(%RSP),%XMM27 |
(1171) 0x45fb51 VMOVSD 0x98(%RSP),%XMM17 |
(1171) 0x45fb59 VMOVSD 0x90(%RSP),%XMM18 |
(1171) 0x45fb61 VMOVSD 0x88(%RSP),%XMM19 |
(1171) 0x45fb69 VMOVUPD 0x180(%RSP),%YMM28 |
(1171) 0x45fb71 VMOVUPD 0x160(%RSP),%YMM29 |
(1171) 0x45fb79 VMOVUPD 0x140(%RSP),%YMM30 |
(1171) 0x45fb81 JMP 45fbb9 |
0x45fb83 NOPW %CS:(%RAX,%RAX,1) |
(1172) 0x45fb90 MOV 0xa8(%RSP),%RSI |
(1172) 0x45fb98 LEA 0x1(%RSI),%RCX |
(1172) 0x45fb9c MOV 0xb0(%RSP),%RDX |
(1172) 0x45fba4 SUB $-0x80,%RDX |
(1172) 0x45fba8 SUBQ $-0x80,0x18(%RSP) |
(1172) 0x45fbae CMP 0x78(%RSP),%RSI |
(1172) 0x45fbb3 JE 45f980 |
(1172) 0x45fbb9 MOV 0x10(%RSP),%RSI |
(1172) 0x45fbbe CMP %RSI,%RDX |
(1172) 0x45fbc1 MOV %RSI,%R13 |
(1172) 0x45fbc4 MOV %RDX,0xb0(%RSP) |
(1172) 0x45fbcc CMOVB %RDX,%R13 |
(1172) 0x45fbd0 MOV %RCX,0xa8(%RSP) |
(1172) 0x45fbd8 MOV %RCX,%RDX |
(1172) 0x45fbdb SAL $0x7,%RDX |
(1172) 0x45fbdf LEA 0x80(%RDX),%RCX |
(1172) 0x45fbe6 CMP %RSI,%RCX |
(1172) 0x45fbe9 VMOVDQU 0xf0(%RSP),%XMM0 |
(1172) 0x45fbf2 VMOVQ %XMM0,%RBX |
(1172) 0x45fbf7 VMOVSD (%RBX),%XMM26 |
(1172) 0x45fbfd VPEXTRQ $0x1,%XMM0,%RBX |
(1172) 0x45fc03 VMOVHPD (%RBX),%XMM26,%XMM0 |
(1172) 0x45fc09 VMOVUPD %XMM0,0x120(%RSP) |
(1172) 0x45fc12 CMOVAE %RSI,%RCX |
(1172) 0x45fc16 MOV 0x68(%RSP),%RSI |
(1172) 0x45fc1b MOV 0x80(%RSP),%RBX |
(1172) 0x45fc23 VMOVSD (%RSI,%RBX,8),%XMM0 |
(1172) 0x45fc28 VMOVSD %XMM0,0x130(%RSP) |
(1172) 0x45fc31 CMP %RCX,%RDX |
(1172) 0x45fc34 MOV 0x70(%RSP),%RDX |
(1172) 0x45fc39 MOV 0x60(%RSP),%RSI |
(1172) 0x45fc3e JAE 45fb90 |
(1172) 0x45fc44 VMOVUPS 0x128(%RSP),%XMM0 |
(1172) 0x45fc4d VMOVUPS %XMM0,0x110(%RSP) |
(1172) 0x45fc56 VMOVUPS 0x1d8(%RDI),%XMM0 |
(1172) 0x45fc5e VUNPCKLPD 0xe0(%RSP),%XMM0,%XMM0 |
(1172) 0x45fc67 VMOVUPS %XMM0,0x100(%RSP) |
(1172) 0x45fc70 MOV 0x18(%RSP),%R12 |
(1172) 0x45fc75 NOPW %CS:(%RAX,%RAX,1) |
(1173) 0x45fc80 XOR %ECX,%ECX |
(1173) 0x45fc82 TEST $-0x80000000,%R12D |
(1173) 0x45fc89 SETE %CL |
(1173) 0x45fc8c MOVSXD %R12D,%R14 |
(1173) 0x45fc8f VMOVSD (%RDX,%R14,8),%XMM0 |
(1173) 0x45fc95 VMOVDDUP 0x49c680(,%RCX,8),%XMM1 |
(1173) 0x45fc9e VSUBSD %XMM26,%XMM0,%XMM0 |
(1173) 0x45fca4 VMULSD %XMM1,%XMM0,%XMM0 |
(1173) 0x45fca8 VMOVSD (%RSI,%R14,8),%XMM2 |
(1173) 0x45fcae VMOVHPD (%R9,%R14,8),%XMM2,%XMM2 |
(1173) 0x45fcb4 VMULSD %XMM0,%XMM20,%XMM3 |
(1173) 0x45fcba VSUBPD 0x110(%RSP),%XMM2,%XMM2 |
(1173) 0x45fcc3 VMULPD %XMM1,%XMM2,%XMM2 |
(1173) 0x45fcc7 VUNPCKLPD %XMM0,%XMM2,%XMM4 |
(1173) 0x45fccb VMULPD %XMM4,%XMM10,%XMM4 |
(1173) 0x45fccf VUNPCKLPD %XMM2,%XMM0,%XMM6 |
(1173) 0x45fcd3 VFMADD213PD %XMM4,%XMM16,%XMM6 |
(1173) 0x45fcd9 VPERMILPD $0x3,%XMM2,%XMM4 |
(1173) 0x45fcdf VFMADD213PD %XMM6,%XMM11,%XMM4 |
(1173) 0x45fce4 VROUNDPD $0x9,%XMM4,%XMM4 |
(1173) 0x45fcea VFMADD231SD %XMM2,%XMM15,%XMM3 |
(1173) 0x45fcef VPERMILPD $0x1,%XMM2,%XMM6 |
(1173) 0x45fcf5 VFMADD213SD %XMM3,%XMM12,%XMM6 |
(1173) 0x45fcfa VROUNDSD $0x9,%XMM6,%XMM6,%XMM3 |
(1173) 0x45fd00 VPERMILPD $0x1,%XMM4,%XMM8 |
(1173) 0x45fd06 VMOVDDUP %XMM3,%XMM6 |
(1173) 0x45fd0a VFMSUB231SD %XMM21,%XMM4,%XMM0 |
(1173) 0x45fd10 VFMADD231SD %XMM22,%XMM8,%XMM0 |
(1173) 0x45fd16 VFNMSUB231SD %XMM3,%XMM23,%XMM0 |
(1173) 0x45fd1c VFMSUB213PD %XMM2,%XMM13,%XMM8 |
(1173) 0x45fd21 VFMADD231PD %XMM4,%XMM9,%XMM8 |
(1173) 0x45fd26 VFNMSUB231PD %XMM6,%XMM14,%XMM8 |
(1173) 0x45fd2b VADDSD %XMM0,%XMM24,%XMM2 |
(1173) 0x45fd31 VADDSD %XMM8,%XMM25,%XMM3 |
(1173) 0x45fd37 VPERMILPD $0x1,%XMM8,%XMM31 |
(1173) 0x45fd3e VADDSD 0xb8(%RSP),%XMM31,%XMM4 |
(1173) 0x45fd46 VBROADCASTSD %XMM0,%YMM6 |
(1173) 0x45fd4b VUNPCKLPD %XMM6,%XMM2,%XMM2 |
(1173) 0x45fd4f VMULPD %XMM2,%XMM2,%XMM2 |
(1173) 0x45fd53 VUNPCKLPD %XMM8,%XMM3,%XMM3 |
(1173) 0x45fd58 VFMADD213PD %XMM2,%XMM3,%XMM3 |
(1173) 0x45fd5d VBLENDPD $0x1,%XMM4,%XMM8,%XMM2 |
(1173) 0x45fd63 VFMADD213PD %XMM3,%XMM2,%XMM2 |
(1173) 0x45fd68 VPERMILPD $0x1,%XMM2,%XMM3 |
(1173) 0x45fd6e XOR %R10D,%R10D |
(1173) 0x45fd71 VUCOMISD %XMM3,%XMM2 |
(1173) 0x45fd75 SETB %R10B |
(1173) 0x45fd79 VMINSD %XMM3,%XMM2,%XMM2 |
(1173) 0x45fd7d VADDSD %XMM0,%XMM27,%XMM3 |
(1173) 0x45fd83 VADDSD %XMM8,%XMM17,%XMM4 |
(1173) 0x45fd89 VMOVAPD %XMM12,%XMM5 |
(1173) 0x45fd8d VADDSD %XMM31,%XMM18,%XMM12 |
(1173) 0x45fd93 VMULSD %XMM3,%XMM3,%XMM3 |
(1173) 0x45fd97 VFMADD231SD %XMM4,%XMM4,%XMM3 |
(1173) 0x45fd9c VFMADD231SD %XMM12,%XMM12,%XMM3 |
(1173) 0x45fda1 VMOVAPD %XMM5,%XMM12 |
(1173) 0x45fda5 VUCOMISD %XMM2,%XMM3 |
(1173) 0x45fda9 VMINSD %XMM2,%XMM3,%XMM2 |
(1173) 0x45fdad VADDPD 0x100(%RSP),%XMM8,%XMM3 |
(1173) 0x45fdb6 VMULPD %XMM3,%XMM3,%XMM3 |
(1173) 0x45fdba VPERMILPD $0x1,%XMM3,%XMM4 |
(1173) 0x45fdc0 VADDSD %XMM3,%XMM4,%XMM3 |
(1173) 0x45fdc4 VADDSD %XMM0,%XMM19,%XMM4 |
(1173) 0x45fdca VFMADD213SD %XMM3,%XMM4,%XMM4 |
(1173) 0x45fdcf MOV $0x2,%ECX |
(1173) 0x45fdd4 CMOVB %RCX,%R10 |
(1173) 0x45fdd8 VUCOMISD %XMM2,%XMM4 |
(1173) 0x45fddc VMINSD %XMM2,%XMM4,%XMM2 |
(1173) 0x45fde0 VADDPD %YMM6,%YMM28,%YMM3 |
(1173) 0x45fde6 VBROADCASTSD %XMM8,%YMM4 |
(1173) 0x45fdeb VADDPD %YMM4,%YMM29,%YMM4 |
(1173) 0x45fdf1 VPERMPD $0x55,%YMM8,%YMM6 |
(1173) 0x45fdf7 VADDPD %YMM6,%YMM30,%YMM6 |
(1173) 0x45fdfd VMULPD %YMM3,%YMM3,%YMM3 |
(1173) 0x45fe01 VFMADD231PD %YMM4,%YMM4,%YMM3 |
(1173) 0x45fe06 VFMADD231PD %YMM6,%YMM6,%YMM3 |
(1173) 0x45fe0b VMINSD %XMM2,%XMM3,%XMM4 |
(1173) 0x45fe0f VPERMILPD $0x1,%XMM3,%XMM6 |
(1173) 0x45fe15 VMINSD %XMM4,%XMM6,%XMM6 |
(1173) 0x45fe19 VEXTRACTF128 $0x1,%YMM3,%XMM7 |
(1173) 0x45fe1f VUNPCKLPD %XMM4,%XMM2,%XMM2 |
(1173) 0x45fe23 VMINSD %XMM6,%XMM7,%XMM4 |
(1173) 0x45fe27 VUNPCKLPD %XMM4,%XMM6,%XMM6 |
(1173) 0x45fe2b VINSERTF128 $0x1,%XMM6,%YMM2,%YMM2 |
(1173) 0x45fe31 VPERMILPD $0x1,%XMM7,%XMM6 |
(1173) 0x45fe37 VMINSD %XMM4,%XMM6,%XMM4 |
(1173) 0x45fe3b VSQRTSD %XMM4,%XMM4,%XMM4 |
(1173) 0x45fe3f VMOVSD %XMM4,(%R8,%R14,8) |
(1173) 0x45fe45 VCMPPD $0x1,%YMM2,%YMM3,%K0 |
(1173) 0x45fe4c KMOVD %K0,%ECX |
(1173) 0x45fe50 MOV $0x3,%EBX |
(1173) 0x45fe55 CMOVB %RBX,%R10 |
(1173) 0x45fe59 MOV $0x4,%EBX |
(1173) 0x45fe5e TEST $0x1,%CL |
(1173) 0x45fe61 CMOVNE %RBX,%R10 |
(1173) 0x45fe65 MOV $0x5,%EBX |
(1173) 0x45fe6a TEST $0x2,%CL |
(1173) 0x45fe6d CMOVNE %RBX,%R10 |
(1173) 0x45fe71 MOV $0x6,%EBX |
(1173) 0x45fe76 TEST $0x4,%CL |
(1173) 0x45fe79 CMOVNE %RBX,%R10 |
(1173) 0x45fe7d TEST $0x8,%CL |
(1173) 0x45fe80 MOV $0x7,%ECX |
(1173) 0x45fe85 CMOVNE %RCX,%R10 |
(1173) 0x45fe89 VADDSD 0x180(%RDI,%R10,8),%XMM0,%XMM0 |
(1173) 0x45fe93 VMULSD %XMM1,%XMM0,%XMM0 |
(1173) 0x45fe97 VMOVSD %XMM0,(%R11,%R14,8) |
(1173) 0x45fe9d VADDSD 0x1c0(%RDI,%R10,8),%XMM8,%XMM0 |
(1173) 0x45fea7 VMULSD %XMM1,%XMM0,%XMM0 |
(1173) 0x45feab VMOVSD %XMM0,(%RAX,%R14,8) |
(1173) 0x45feb1 VADDSD 0x200(%RDI,%R10,8),%XMM31,%XMM0 |
(1173) 0x45feb9 VMULSD %XMM1,%XMM0,%XMM0 |
(1173) 0x45febd VMOVSD %XMM0,(%R15,%R14,8) |
(1173) 0x45fec3 INC %R12 |
(1173) 0x45fec6 CMP %R12,%R13 |
(1173) 0x45fec9 JNE 45fc80 |
(1172) 0x45fecf JMP 45fb90 |
0x45fed4 NOPW %CS:(%RAX,%RAX,1) |
0x45fede XCHG %AX,%AX |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | qmcplusplus::ParticleSet::upda[...] | stl_vector.h:990 | exec |
○ | main.extracted.107 | miniqmc.cpp:390 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:374 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | DistanceTableBA.h:66-105 |
Module | exec |
nb instructions | 81 |
nb uops | 84 |
loop length | 329 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 15 |
micro-operation queue | 21.00 cycles |
front end | 21.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 9.25 | 9.25 | 11.67 | 11.67 | 20.00 | 9.25 | 9.25 | 11.67 |
cycles | 9.25 | 9.25 | 11.67 | 11.67 | 20.00 | 9.25 | 9.25 | 11.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.41 |
Stall cycles | 0.00 |
Front-end | 21.00 |
Dispatch | 20.00 |
Overall L1 | 21.00 |
all | 10% |
load | 0% |
store | 7% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 15% |
all | 100% |
load | NA (no load vectorizable/vectorized instructions) |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 13% |
load | 0% |
store | 14% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 13% |
load | 12% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 25% |
load | NA (no load vectorizable/vectorized instructions) |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 14% |
load | 12% |
store | 14% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
AND $-0x20,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SUB $0x1c0,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOVSXD 0xc8(%RDI),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %R8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 45f898 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD 0xc4(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD $0x7,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x8,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x7f,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP $0x80,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JAE 45f8aa | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA 0x7(%R8),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SHR $0x7,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV 0x8(%RDI),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x40(%RDI),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RDX,0x48(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xa8(%RDI),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RDX,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x610(%R9),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x610(%RSI),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x10(%RSP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD %R10D,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R10,%R10,1),%R14D | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RDX,%R9,8),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x60(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOVSXD %R14D,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RDX,0x70(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (%RDX,%RSI,8),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x2,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVB %RDX,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CMP $0x2,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVAE %R8,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
DEC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SHR $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (%R10,%R10,2),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x30(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x4,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOVSXD %ESI,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RDX,0x28(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SAL $0x3,%EAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CLTQ | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV %RAX,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
DEC %RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,0x78(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
AND $-0x8,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVQ %RCX,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPSLLDQ $0x8,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVUPD %XMM0,0xd0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R11,0x68(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VPBROADCASTQ %R11,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVDQU %XMM0,0xc0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
ADD %RCX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,0x50(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 45f994 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | DistanceTableBA.h:66-105 |
Module | exec |
nb instructions | 81 |
nb uops | 84 |
loop length | 329 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 15 |
micro-operation queue | 21.00 cycles |
front end | 21.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 9.25 | 9.25 | 11.67 | 11.67 | 20.00 | 9.25 | 9.25 | 11.67 |
cycles | 9.25 | 9.25 | 11.67 | 11.67 | 20.00 | 9.25 | 9.25 | 11.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.41 |
Stall cycles | 0.00 |
Front-end | 21.00 |
Dispatch | 20.00 |
Overall L1 | 21.00 |
all | 10% |
load | 0% |
store | 7% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 15% |
all | 100% |
load | NA (no load vectorizable/vectorized instructions) |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 13% |
load | 0% |
store | 14% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 13% |
load | 12% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 25% |
load | NA (no load vectorizable/vectorized instructions) |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 14% |
load | 12% |
store | 14% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
AND $-0x20,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SUB $0x1c0,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOVSXD 0xc8(%RDI),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %R8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 45f898 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD 0xc4(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD $0x7,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x8,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x7f,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP $0x80,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JAE 45f8aa | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA 0x7(%R8),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SHR $0x7,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV 0x8(%RDI),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x40(%RDI),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RDX,0x48(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xa8(%RDI),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RDX,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x610(%R9),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x610(%RSI),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x10(%RSP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD %R10D,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R10,%R10,1),%R14D | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RDX,%R9,8),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x60(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOVSXD %R14D,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RDX,0x70(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (%RDX,%RSI,8),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x2,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVB %RDX,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CMP $0x2,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVAE %R8,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
DEC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SHR $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (%R10,%R10,2),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x30(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x4,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOVSXD %ESI,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RDX,0x28(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SAL $0x3,%EAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CLTQ | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV %RAX,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
DEC %RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,0x78(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
AND $-0x8,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVQ %RCX,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPSLLDQ $0x8,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVUPD %XMM0,0xd0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R11,0x68(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VPBROADCASTQ %R11,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VMOVDQU %XMM0,0xc0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
ADD %RCX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,0x50(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 45f994 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼qmcplusplus::DistanceTableBA | 0.15 | 0.1 |
▼Loop 1171 - DistanceTableBA.h:88-102 - exec– | 0 | 0 |
▼Loop 1172 - DistanceTableBA.h:89-99 - exec– | 0 | 0 |
○Loop 1173 - DistanceTableBA.h:99-99 - exec | 0.15 | 0.1 |