Function: qmcplusplus::DistanceTableBA<double, 3u, 39>::evaluate(qmcplusplus::ParticleSet&) | Module: exec | Source: DistanceTableBA.h:66-105 [...] | Coverage: 0.17% |
---|
Function: qmcplusplus::DistanceTableBA<double, 3u, 39>::evaluate(qmcplusplus::ParticleSet&) | Module: exec | Source: DistanceTableBA.h:66-105 [...] | Coverage: 0.17% |
---|
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds.h: 235 - 278 |
-------------------------------------------------------------------------------- |
235: const T* restrict py = R0 + r0_stride; |
236: const T* restrict pz = R0 + r0_stride * 2; |
237: |
238: T* restrict dx = temp_dr; |
239: T* restrict dy = temp_dr + padded_size; |
240: T* restrict dz = temp_dr + padded_size * 2; |
[...] |
249: const T flip = iat < flip_ind ? one : minusone; |
250: const T displ_0 = (px[iat] - x0) * flip; |
251: const T displ_1 = (py[iat] - y0) * flip; |
252: const T displ_2 = (pz[iat] - z0) * flip; |
253: |
254: const T ar_0 = -std::floor(displ_0 * g00 + displ_1 * g10 + displ_2 * g20); |
255: const T ar_1 = -std::floor(displ_0 * g01 + displ_1 * g11 + displ_2 * g21); |
256: const T ar_2 = -std::floor(displ_0 * g02 + displ_1 * g12 + displ_2 * g22); |
257: |
258: const T delx = displ_0 + ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
259: const T dely = displ_1 + ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
260: const T delz = displ_2 + ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
[...] |
267: const T x = delx + cellx[c]; |
268: const T y = dely + celly[c]; |
269: const T z = delz + cellz[c]; |
270: const T r2 = x * x + y * y + z * z; |
271: ic = (r2 < rmin) ? c : ic; |
272: rmin = (r2 < rmin) ? r2 : rmin; |
273: } |
274: |
275: temp_r[iat] = std::sqrt(rmin); |
276: dx[iat] = flip * (delx + cellx[ic]); |
277: dy[iat] = flip * (dely + celly[ic]); |
278: dz[iat] = flip * (delz + cellz[ic]); |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Particle/DistanceTableBA.h: 66 - 105 |
-------------------------------------------------------------------------------- |
66: { |
[...] |
73: const size_t ntgt_local = Ntargets; |
74: const size_t ntgt_padded = getAlignedSize<T>(Ntargets); |
75: const size_t nsrc_padded = getAlignedSize<T>(Nsources); |
76: |
77: auto* dist_ptr = Distances.data(); |
78: auto* displ_ptr = memoryPool.data(); |
79: auto* src_ptr = Origin->RSoA.data(); |
80: auto* tgt_ptr = P.RSoA.data(); |
81: |
82: const int ChunkSizePerTeam = 128; |
83: const size_t num_teams = (nsrc_padded + ChunkSizePerTeam - 1) / ChunkSizePerTeam; |
84: |
85: PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(Ntargets * num_teams) \ |
86: map(to: src_ptr[:D*Origin->RSoA.capacity()], tgt_ptr[:D*P.RSoA.capacity()]) \ |
87: map(from: dist_ptr[:Distances.size()], displ_ptr[:memoryPool.size()])") |
88: for (size_t iat = 0; iat < ntgt_local; ++iat) |
89: for (size_t team_id = 0; team_id < num_teams; ++team_id) |
90: { |
91: T pos[D]; |
92: for (int idim = 0; idim < D; idim++) |
93: pos[idim] = *(tgt_ptr + ntgt_padded * idim + iat); |
[...] |
99: for (size_t jel = first; jel < last; ++jel) |
100: { |
101: DTD_BConds<T, D, SC>::computeDistancesOffload(pos, src_ptr, nsrc_padded, dist_ptr + nsrc_padded * iat, |
102: displ_ptr + nsrc_padded * D * iat, nsrc_padded, jel); |
103: } |
104: } |
105: } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 248 - 248 |
-------------------------------------------------------------------------------- |
248: inline pointer data() { return X; } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/aligned_allocator.hpp: 46 - 46 |
-------------------------------------------------------------------------------- |
46: return ((n + ND - 1) / ND) * ND; |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 237 - 239 |
-------------------------------------------------------------------------------- |
237: T* data() { return myData; } |
238: ///return the base |
239: const T* data() const { return myData; } |
/usr/lib64/gcc/x86_64-pc-linux-gnu/13.1.1/../../../../include/c++/13.1.1/bits/stl_algobase.h: 238 - 238 |
-------------------------------------------------------------------------------- |
238: if (__b < __a) |
/usr/lib64/gcc/x86_64-pc-linux-gnu/13.1.1/../../../../include/c++/13.1.1/bits/stl_vector.h: 1258 - 1258 |
-------------------------------------------------------------------------------- |
1258: { return _M_data_ptr(this->_M_impl._M_start); } |
0x458ec0 PUSH %RBP |
0x458ec1 MOV %RSP,%RBP |
0x458ec4 PUSH %R15 |
0x458ec6 PUSH %R14 |
0x458ec8 PUSH %R13 |
0x458eca PUSH %R12 |
0x458ecc PUSH %RBX |
0x458ecd AND $-0x20,%RSP |
0x458ed1 SUB $0x1c0,%RSP |
0x458ed8 MOVSXD 0xc8(%RDI),%R8 |
0x458edf TEST %R8,%R8 |
0x458ee2 JE 45959a |
0x458ee8 MOVSXD 0xc4(%RDI),%RAX |
0x458eef ADD $0x7,%RAX |
0x458ef3 MOV %RAX,%RCX |
0x458ef6 AND $-0x8,%RCX |
0x458efa LEA 0x7f(%RCX),%RDX |
0x458efe SHR $0x7,%RDX |
0x458f02 CMP $0x2,%RDX |
0x458f06 MOV $0x1,%EBX |
0x458f0b CMOVB %RBX,%RDX |
0x458f0f MOV %RDX,0x28(%RSP) |
0x458f14 MOV %RCX,%RDX |
0x458f17 MOV %RCX,0x18(%RSP) |
0x458f1c DEC %RCX |
0x458f1f CMP $-0x80,%RCX |
0x458f23 JAE 45959a |
0x458f29 LEA 0x7(%R8),%RDX |
0x458f2d MOV 0x8(%RDI),%RCX |
0x458f31 MOV 0x40(%RDI),%RBX |
0x458f35 MOV %RBX,0x60(%RSP) |
0x458f3a MOV 0xa8(%RDI),%RBX |
0x458f41 MOV %RBX,0x58(%RSP) |
0x458f46 MOV 0x610(%RCX),%RBX |
0x458f4d MOV 0x610(%RSI),%RCX |
0x458f54 MOV %RCX,0x78(%RSP) |
0x458f59 MOVSXD 0x18(%RSP),%RCX |
0x458f5e LEA (%RBX,%RCX,8),%RSI |
0x458f62 MOV %RSI,0x90(%RSP) |
0x458f6a ADD %ECX,%ECX |
0x458f6c MOVSXD %ECX,%RCX |
0x458f6f LEA (%RBX,%RCX,8),%R9 |
0x458f73 SHR $0x3,%RAX |
0x458f77 CMP $0x2,%R8 |
0x458f7b MOV $0x1,%ECX |
0x458f80 CMOVAE %R8,%RCX |
0x458f84 DEC %RCX |
0x458f87 MOV %RCX,0x70(%RSP) |
0x458f8c LEA (,%RAX,8),%RSI |
0x458f94 LEA (%RSI,%RSI,2),%RCX |
0x458f98 MOV %RCX,0x50(%RSP) |
0x458f9d MOV %EAX,%ESI |
0x458f9f SAL $0x4,%ESI |
0x458fa2 MOVSXD %ESI,%RCX |
0x458fa5 MOV %RCX,0x48(%RSP) |
0x458faa SAL $0x3,%EAX |
0x458fad CLTQ |
0x458faf MOV %RAX,0x40(%RSP) |
0x458fb4 DECQ 0x28(%RSP) |
0x458fb9 LEA (%RDX,%RDX,1),%RAX |
0x458fbd AND $-0x10,%RAX |
0x458fc1 MOV %RAX,0x38(%RSP) |
0x458fc6 AND $-0x8,%RDX |
0x458fca MOV %RDX,0x68(%RSP) |
0x458fcf XOR %R8D,%R8D |
0x458fd2 JMP 458ff4 |
0x458fd4 NOPW %CS:(%RAX,%RAX,1) |
(975) 0x458fe0 MOV 0x20(%RSP),%RCX |
(975) 0x458fe5 LEA 0x1(%RCX),%R8 |
(975) 0x458fe9 CMP 0x70(%RSP),%RCX |
(975) 0x458fee JE 45959a |
(975) 0x458ff4 MOV 0x18(%RSP),%RAX |
(975) 0x458ff9 IMUL %R8,%RAX |
(975) 0x458ffd MOV 0x60(%RSP),%RCX |
(975) 0x459002 LEA (%RCX,%RAX,8),%R15 |
(975) 0x459006 MOV 0x50(%RSP),%RAX |
(975) 0x45900b IMUL %R8,%RAX |
(975) 0x45900f VMOVUPD 0xf0(%RDI),%XMM0 |
(975) 0x459017 VMOVUPD 0xf8(%RDI),%XMM1 |
(975) 0x45901f VMOVUPD 0x100(%RDI),%XMM2 |
(975) 0x459027 VMOVSD 0x120(%RDI),%XMM27 |
(975) 0x45902e VMOVSD 0x128(%RDI),%XMM3 |
(975) 0x459036 VMOVSD %XMM3,0xf0(%RSP) |
(975) 0x45903f VMOVSD 0x130(%RDI),%XMM3 |
(975) 0x459047 VMOVSD %XMM3,0xe8(%RSP) |
(975) 0x459050 VMOVUPS 0x150(%RDI),%XMM3 |
(975) 0x459058 VMOVUPD 0x158(%RDI),%XMM4 |
(975) 0x459060 VMOVUPD 0x160(%RDI),%XMM5 |
(975) 0x459068 VMOVSD 0x168(%RDI),%XMM6 |
(975) 0x459070 VMOVSD %XMM6,0xe0(%RSP) |
(975) 0x459079 MOV 0x58(%RSP),%RCX |
(975) 0x45907e LEA (%RCX,%RAX,8),%R14 |
(975) 0x459082 VMOVSD 0x170(%RDI),%XMM6 |
(975) 0x45908a VMOVSD %XMM6,0xd8(%RSP) |
(975) 0x459093 VMOVSD 0x178(%RDI),%XMM6 |
(975) 0x45909b VMOVSD %XMM6,0xd0(%RSP) |
(975) 0x4590a4 MOV 0x48(%RSP),%RDX |
(975) 0x4590a9 LEA (%RAX,%RDX,1),%RSI |
(975) 0x4590ad LEA (%RCX,%RSI,8),%R12 |
(975) 0x4590b1 ADD 0x40(%RSP),%RAX |
(975) 0x4590b6 LEA (%RCX,%RAX,8),%RAX |
(975) 0x4590ba MOV 0x38(%RSP),%RCX |
(975) 0x4590bf ADD %R8,%RCX |
(975) 0x4590c2 MOV %RCX,0x88(%RSP) |
(975) 0x4590ca VMOVSD 0x188(%RDI),%XMM6 |
(975) 0x4590d2 VMOVSD %XMM6,0xc8(%RSP) |
(975) 0x4590db VMOVSD 0x1c8(%RDI),%XMM6 |
(975) 0x4590e3 VMOVSD %XMM6,0xc0(%RSP) |
(975) 0x4590ec VMOVSD 0x208(%RDI),%XMM6 |
(975) 0x4590f4 VMOVSD %XMM6,0xb8(%RSP) |
(975) 0x4590fd VMOVSD 0x190(%RDI),%XMM6 |
(975) 0x459105 VMOVSD %XMM6,0xb0(%RSP) |
(975) 0x45910e VMOVSD 0x1d0(%RDI),%XMM6 |
(975) 0x459116 VMOVSD %XMM6,0xa8(%RSP) |
(975) 0x45911f VMOVSD 0x210(%RDI),%XMM6 |
(975) 0x459127 VMOVSD %XMM6,0xa0(%RSP) |
(975) 0x459130 VMOVSD 0x198(%RDI),%XMM6 |
(975) 0x459138 VMOVUPS %XMM6,0x110(%RSP) |
(975) 0x459141 VMOVSD 0x218(%RDI),%XMM6 |
(975) 0x459149 VMOVSD %XMM6,0x98(%RSP) |
(975) 0x459152 VMOVUPS 0x1a0(%RDI),%YMM6 |
(975) 0x45915a VMOVUPS %YMM6,0x180(%RSP) |
(975) 0x459163 VMOVUPS 0x1e0(%RDI),%YMM6 |
(975) 0x45916b VMOVUPS %YMM6,0x160(%RSP) |
(975) 0x459174 VMOVUPD 0x220(%RDI),%YMM6 |
(975) 0x45917c VMOVUPD %YMM6,0x140(%RSP) |
(975) 0x459185 VMOVHPS 0x140(%RDI),%XMM3,%XMM3 |
(975) 0x45918d VMOVUPS %XMM3,0x120(%RSP) |
(975) 0x459196 VMOVHPD 0x110(%RDI),%XMM0,%XMM3 |
(975) 0x45919e VMOVHPD 0x108(%RDI),%XMM1,%XMM30 |
(975) 0x4591a5 VMOVHPD 0x118(%RDI),%XMM2,%XMM31 |
(975) 0x4591ac VMOVHPD 0x138(%RDI),%XMM4,%XMM8 |
(975) 0x4591b4 MOV 0x68(%RSP),%RCX |
(975) 0x4591b9 MOV %R8,0x20(%RSP) |
(975) 0x4591be ADD %R8,%RCX |
(975) 0x4591c1 MOV %RCX,0x80(%RSP) |
(975) 0x4591c9 VMOVHPD 0x148(%RDI),%XMM5,%XMM9 |
(975) 0x4591d1 MOV $0x80,%EDX |
(975) 0x4591d6 MOVQ $0,0x30(%RSP) |
(975) 0x4591df XOR %ECX,%ECX |
(975) 0x4591e1 VMOVSD 0xf0(%RSP),%XMM11 |
(975) 0x4591ea VMOVSD 0xe8(%RSP),%XMM13 |
(975) 0x4591f3 VMOVSD 0xe0(%RSP),%XMM17 |
(975) 0x4591fb VMOVSD 0xd8(%RSP),%XMM18 |
(975) 0x459203 VMOVSD 0xd0(%RSP),%XMM19 |
(975) 0x45920b VMOVSD 0xc8(%RSP),%XMM20 |
(975) 0x459213 VMOVSD 0xc0(%RSP),%XMM21 |
(975) 0x45921b VMOVSD 0xb8(%RSP),%XMM22 |
(975) 0x459223 VMOVSD 0xb0(%RSP),%XMM23 |
(975) 0x45922b VMOVSD 0xa8(%RSP),%XMM24 |
(975) 0x459233 VMOVSD 0xa0(%RSP),%XMM14 |
(975) 0x45923c VMOVSD 0x98(%RSP),%XMM15 |
(975) 0x459245 VMOVUPD 0x180(%RSP),%YMM16 |
(975) 0x45924d VMOVUPD 0x160(%RSP),%YMM25 |
(975) 0x459255 VMOVUPD 0x140(%RSP),%YMM26 |
(975) 0x45925d VMOVUPD 0x120(%RSP),%XMM28 |
(975) 0x459265 JMP 459299 |
0x459267 NOPW (%RAX,%RAX,1) |
(976) 0x459270 MOV 0xf8(%RSP),%RSI |
(976) 0x459278 LEA 0x1(%RSI),%RCX |
(976) 0x45927c SUBQ $-0x80,0x30(%RSP) |
(976) 0x459282 MOV 0x100(%RSP),%RDX |
(976) 0x45928a SUB $-0x80,%RDX |
(976) 0x45928e CMP 0x28(%RSP),%RSI |
(976) 0x459293 JE 458fe0 |
(976) 0x459299 MOV 0x18(%RSP),%RSI |
(976) 0x45929e CMP %RSI,%RDX |
(976) 0x4592a1 MOV %RSI,%R13 |
(976) 0x4592a4 MOV %RDX,0x100(%RSP) |
(976) 0x4592ac CMOVB %RDX,%R13 |
(976) 0x4592b0 MOV %RCX,0xf8(%RSP) |
(976) 0x4592b8 MOV %RCX,%RDX |
(976) 0x4592bb SAL $0x7,%RDX |
(976) 0x4592bf MOV %RBX,%R8 |
(976) 0x4592c2 LEA 0x80(%RDX),%RBX |
(976) 0x4592c9 XOR %ECX,%ECX |
(976) 0x4592cb CMP %RBX,%RSI |
(976) 0x4592ce SETAE %CL |
(976) 0x4592d1 XOR %EBX,%EBX |
(976) 0x4592d3 CMP %RDX,%RSI |
(976) 0x4592d6 SETA %BL |
(976) 0x4592d9 CMP %CL,%BL |
(976) 0x4592db CMOVA %EBX,%ECX |
(976) 0x4592de MOV %R8,%RBX |
(976) 0x4592e1 CMP $0x1,%CL |
(976) 0x4592e4 MOV 0x90(%RSP),%RDX |
(976) 0x4592ec JNE 459270 |
(976) 0x4592ee MOV 0x78(%RSP),%RCX |
(976) 0x4592f3 MOV 0x88(%RSP),%RSI |
(976) 0x4592fb VMOVSD (%RCX,%RSI,8),%XMM0 |
(976) 0x459300 VMOVSD %XMM0,0x108(%RSP) |
(976) 0x459309 VMOVUPS 0x1d8(%RDI),%XMM0 |
(976) 0x459311 VUNPCKLPD 0x110(%RSP),%XMM0,%XMM0 |
(976) 0x45931a VMOVUPS %XMM0,0x130(%RSP) |
(976) 0x459323 MOV 0x80(%RSP),%RSI |
(976) 0x45932b VMOVSD (%RCX,%RSI,8),%XMM0 |
(976) 0x459330 MOV 0x20(%RSP),%RSI |
(976) 0x459335 VMOVHPD (%RCX,%RSI,8),%XMM0,%XMM10 |
(976) 0x45933a MOV 0x30(%RSP),%R8 |
(976) 0x45933f NOP |
(977) 0x459340 XOR %ECX,%ECX |
(977) 0x459342 TEST $-0x80000000,%R8D |
(977) 0x459349 SETE %CL |
(977) 0x45934c MOVSXD %R8D,%R10 |
(977) 0x45934f VMOVSD (%R9,%R10,8),%XMM0 |
(977) 0x459355 VSUBSD 0x108(%RSP),%XMM0,%XMM0 |
(977) 0x45935e VMOVDDUP 0x496300(,%RCX,8),%XMM6 |
(977) 0x459367 VMULSD %XMM6,%XMM0,%XMM0 |
(977) 0x45936b VMOVSD (%RDX,%R10,8),%XMM7 |
(977) 0x459371 VMOVHPD (%RBX,%R10,8),%XMM7,%XMM7 |
(977) 0x459377 VSUBPD %XMM10,%XMM7,%XMM7 |
(977) 0x45937c VMULPD %XMM6,%XMM7,%XMM12 |
(977) 0x459380 VMULPD %XMM12,%XMM30,%XMM7 |
(977) 0x459386 VPERMILPD $0x1,%XMM12,%XMM1 |
(977) 0x45938c VFMADD231PD %XMM1,%XMM3,%XMM7 |
(977) 0x459391 VMOVDDUP %XMM0,%XMM2 |
(977) 0x459395 VFMADD213PD %XMM7,%XMM31,%XMM2 |
(977) 0x45939b VROUNDPD $0x9,%XMM2,%XMM7 |
(977) 0x4593a1 VMULSD %XMM1,%XMM27,%XMM1 |
(977) 0x4593a7 VFMADD231SD %XMM12,%XMM11,%XMM1 |
(977) 0x4593ac VFMADD231SD %XMM0,%XMM13,%XMM1 |
(977) 0x4593b1 VROUNDSD $0x9,%XMM1,%XMM1,%XMM1 |
(977) 0x4593b7 VPERMILPD $0x1,%XMM7,%XMM2 |
(977) 0x4593bd VMOVAPD %XMM3,%XMM29 |
(977) 0x4593c3 VMOVAPD %XMM27,%XMM3 |
(977) 0x4593c9 VMOVDDUP %XMM1,%XMM27 |
(977) 0x4593cf VFMSUB231PD %XMM8,%XMM2,%XMM12 |
(977) 0x4593d4 VFMADD231PD %XMM28,%XMM7,%XMM12 |
(977) 0x4593da VFNMSUB231PD %XMM27,%XMM9,%XMM12 |
(977) 0x4593e0 VMOVAPD %XMM3,%XMM27 |
(977) 0x4593e6 VFMSUB213SD %XMM0,%XMM17,%XMM7 |
(977) 0x4593ec VFMADD231SD %XMM2,%XMM18,%XMM7 |
(977) 0x4593f2 VFNMSUB231SD %XMM1,%XMM19,%XMM7 |
(977) 0x4593f8 VPERMILPD $0x1,%XMM12,%XMM0 |
(977) 0x4593fe VADDSD %XMM0,%XMM20,%XMM1 |
(977) 0x459404 VADDSD %XMM12,%XMM21,%XMM2 |
(977) 0x45940a VADDSD %XMM7,%XMM22,%XMM3 |
(977) 0x459410 VBLENDPD $0x1,%XMM1,%XMM12,%XMM1 |
(977) 0x459416 VMULPD %XMM1,%XMM1,%XMM1 |
(977) 0x45941a VUNPCKLPD %XMM12,%XMM2,%XMM2 |
(977) 0x45941f VFMADD213PD %XMM1,%XMM2,%XMM2 |
(977) 0x459424 VBROADCASTSD %XMM7,%YMM1 |
(977) 0x459429 VUNPCKLPD %XMM1,%XMM3,%XMM3 |
(977) 0x45942d VFMADD213PD %XMM2,%XMM3,%XMM3 |
(977) 0x459432 VPERMILPD $0x1,%XMM3,%XMM2 |
(977) 0x459438 XOR %R11D,%R11D |
(977) 0x45943b VUCOMISD %XMM2,%XMM3 |
(977) 0x45943f SETB %R11B |
(977) 0x459443 VMINSD %XMM2,%XMM3,%XMM2 |
(977) 0x459447 VADDSD %XMM0,%XMM23,%XMM3 |
(977) 0x45944d VADDSD %XMM12,%XMM24,%XMM4 |
(977) 0x459453 VADDSD %XMM7,%XMM14,%XMM5 |
(977) 0x459457 VMULSD %XMM3,%XMM3,%XMM3 |
(977) 0x45945b VFMADD231SD %XMM4,%XMM4,%XMM3 |
(977) 0x459460 VFMADD231SD %XMM5,%XMM5,%XMM3 |
(977) 0x459465 VUCOMISD %XMM2,%XMM3 |
(977) 0x459469 VMINSD %XMM2,%XMM3,%XMM2 |
(977) 0x45946d VADDPD 0x130(%RSP),%XMM12,%XMM3 |
(977) 0x459476 VADDSD %XMM7,%XMM15,%XMM4 |
(977) 0x45947a VMULPD %XMM3,%XMM3,%XMM3 |
(977) 0x45947e VPERMILPD $0x1,%XMM3,%XMM5 |
(977) 0x459484 VADDSD %XMM5,%XMM3,%XMM3 |
(977) 0x459488 VFMADD231SD %XMM4,%XMM4,%XMM3 |
(977) 0x45948d MOV $0x2,%ECX |
(977) 0x459492 CMOVB %RCX,%R11 |
(977) 0x459496 VUCOMISD %XMM2,%XMM3 |
(977) 0x45949a VMINSD %XMM2,%XMM3,%XMM2 |
(977) 0x45949e VPERMPD $0x55,%YMM12,%YMM3 |
(977) 0x4594a4 VADDPD %YMM3,%YMM16,%YMM3 |
(977) 0x4594aa VBROADCASTSD %XMM12,%YMM4 |
(977) 0x4594af VADDPD %YMM4,%YMM25,%YMM4 |
(977) 0x4594b5 VADDPD %YMM1,%YMM26,%YMM1 |
(977) 0x4594bb VMULPD %YMM3,%YMM3,%YMM3 |
(977) 0x4594bf VFMADD231PD %YMM4,%YMM4,%YMM3 |
(977) 0x4594c4 VFMADD231PD %YMM1,%YMM1,%YMM3 |
(977) 0x4594c9 VMINSD %XMM2,%XMM3,%XMM1 |
(977) 0x4594cd VPERMILPD $0x1,%XMM3,%XMM4 |
(977) 0x4594d3 VMINSD %XMM1,%XMM4,%XMM4 |
(977) 0x4594d7 VEXTRACTF128 $0x1,%YMM3,%XMM5 |
(977) 0x4594dd VUNPCKLPD %XMM1,%XMM2,%XMM1 |
(977) 0x4594e1 VMINSD %XMM4,%XMM5,%XMM2 |
(977) 0x4594e5 VUNPCKLPD %XMM2,%XMM4,%XMM4 |
(977) 0x4594e9 VINSERTF128 $0x1,%XMM4,%YMM1,%YMM1 |
(977) 0x4594ef VPERMILPD $0x1,%XMM5,%XMM4 |
(977) 0x4594f5 VMINSD %XMM2,%XMM4,%XMM2 |
(977) 0x4594f9 VSQRTSD %XMM2,%XMM2,%XMM2 |
(977) 0x4594fd VMOVSD %XMM2,(%R15,%R10,8) |
(977) 0x459503 VCMPPD $0x1,%YMM1,%YMM3,%K0 |
(977) 0x45950a VMOVAPD %XMM29,%XMM3 |
(977) 0x459510 KMOVD %K0,%ECX |
(977) 0x459514 MOV $0x3,%ESI |
(977) 0x459519 CMOVB %RSI,%R11 |
(977) 0x45951d MOV $0x4,%ESI |
(977) 0x459522 TEST $0x1,%CL |
(977) 0x459525 CMOVNE %RSI,%R11 |
(977) 0x459529 MOV $0x5,%ESI |
(977) 0x45952e TEST $0x2,%CL |
(977) 0x459531 CMOVNE %RSI,%R11 |
(977) 0x459535 MOV $0x6,%ESI |
(977) 0x45953a TEST $0x4,%CL |
(977) 0x45953d CMOVNE %RSI,%R11 |
(977) 0x459541 TEST $0x8,%CL |
(977) 0x459544 MOV $0x7,%ECX |
(977) 0x459549 CMOVNE %RCX,%R11 |
(977) 0x45954d VADDSD 0x180(%RDI,%R11,8),%XMM0,%XMM0 |
(977) 0x459557 VMULSD %XMM6,%XMM0,%XMM0 |
(977) 0x45955b VMOVSD %XMM0,(%R14,%R10,8) |
(977) 0x459561 VADDSD 0x1c0(%RDI,%R11,8),%XMM12,%XMM0 |
(977) 0x45956b VMULSD %XMM6,%XMM0,%XMM0 |
(977) 0x45956f VMOVSD %XMM0,(%RAX,%R10,8) |
(977) 0x459575 VADDSD 0x200(%RDI,%R11,8),%XMM7,%XMM0 |
(977) 0x45957f VMULSD %XMM6,%XMM0,%XMM0 |
(977) 0x459583 VMOVSD %XMM0,(%R12,%R10,8) |
(977) 0x459589 INC %R8 |
(977) 0x45958c CMP %R8,%R13 |
(977) 0x45958f JNE 459340 |
(976) 0x459595 JMP 459270 |
0x45959a LEA -0x28(%RBP),%RSP |
0x45959e POP %RBX |
0x45959f POP %R12 |
0x4595a1 POP %R13 |
0x4595a3 POP %R14 |
0x4595a5 POP %R15 |
0x4595a7 POP %RBP |
0x4595a8 VZEROUPPER |
0x4595ab RET |
0x4595ac NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | qmcplusplus::ParticleSet::upda[...] | ParticleSet.cpp:250 | exec |
○ | main.extracted.107 | miniqmc.cpp:390 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:374 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | DistanceTableBA.h:66-105 |
Module | exec |
nb instructions | 78 |
nb uops | 83 |
loop length | 319 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 13 |
micro-operation queue | 20.75 cycles |
front end | 20.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 9.25 | 9.25 | 11.83 | 11.50 | 19.00 | 9.25 | 9.25 | 11.67 |
cycles | 9.25 | 9.25 | 11.83 | 11.50 | 19.00 | 9.25 | 9.25 | 11.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 19.91 |
Stall cycles | 0.00 |
Front-end | 20.75 |
Dispatch | 19.00 |
Overall L1 | 20.75 |
all | 5% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 13% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
AND $-0x20,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SUB $0x1c0,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOVSXD 0xc8(%RDI),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %R8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 45959a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD 0xc4(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD $0x7,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x8,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x7f(%RCX),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SHR $0x7,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CMP $0x2,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVB %RBX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %RDX,0x28(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
DEC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP $-0x80,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JAE 45959a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA 0x7(%R8),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%RDI),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x40(%RDI),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RBX,0x60(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xa8(%RDI),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RBX,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x610(%RCX),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x610(%RSI),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RCX,0x78(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOVSXD 0x18(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RBX,%RCX,8),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x90(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD %ECX,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOVSXD %ECX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%RBX,%RCX,8),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SHR $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CMP $0x2,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVAE %R8,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
DEC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,0x70(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (,%RAX,8),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RSI,%RSI,2),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x50(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x4,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOVSXD %ESI,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SAL $0x3,%EAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CLTQ | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
DECQ 0x28(%RSP) | 3 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
LEA (%RDX,%RDX,1),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x10,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
AND $-0x8,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RDX,0x68(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 458ff4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | DistanceTableBA.h:66-105 |
Module | exec |
nb instructions | 78 |
nb uops | 83 |
loop length | 319 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 13 |
micro-operation queue | 20.75 cycles |
front end | 20.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 9.25 | 9.25 | 11.83 | 11.50 | 19.00 | 9.25 | 9.25 | 11.67 |
cycles | 9.25 | 9.25 | 11.83 | 11.50 | 19.00 | 9.25 | 9.25 | 11.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 19.91 |
Stall cycles | 0.00 |
Front-end | 20.75 |
Dispatch | 19.00 |
Overall L1 | 20.75 |
all | 5% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 13% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
AND $-0x20,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SUB $0x1c0,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOVSXD 0xc8(%RDI),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %R8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 45959a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD 0xc4(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD $0x7,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x8,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x7f(%RCX),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SHR $0x7,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CMP $0x2,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVB %RBX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %RDX,0x28(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
DEC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP $-0x80,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JAE 45959a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA 0x7(%R8),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%RDI),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x40(%RDI),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RBX,0x60(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xa8(%RDI),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RBX,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x610(%RCX),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x610(%RSI),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RCX,0x78(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOVSXD 0x18(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RBX,%RCX,8),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x90(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD %ECX,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOVSXD %ECX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%RBX,%RCX,8),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SHR $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CMP $0x2,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVAE %R8,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
DEC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,0x70(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (,%RAX,8),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RSI,%RSI,2),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x50(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x4,%ESI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOVSXD %ESI,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SAL $0x3,%EAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CLTQ | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
DECQ 0x28(%RSP) | 3 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
LEA (%RDX,%RDX,1),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x10,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
AND $-0x8,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RDX,0x68(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 458ff4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼qmcplusplus::DistanceTableBA | 0.17 | 0.1 |
▼Loop 975 - DistanceTableBA.h:88-102 - exec– | 0 | 0 |
▼Loop 976 - DistanceTableBA.h:89-99 - exec– | 0 | 0 |
○Loop 977 - DistanceTableBA.h:99-99 - exec | 0.17 | 0.1 |