Function: qmcplusplus::DistanceTableBA<double, 3u, 39>::evaluate(qmcplusplus::ParticleSet&) | Module: exec | Source: DistanceTableBA.h:65-105 [...] | Coverage: 0.11% |
---|
Function: qmcplusplus::DistanceTableBA<double, 3u, 39>::evaluate(qmcplusplus::ParticleSet&) | Module: exec | Source: DistanceTableBA.h:65-105 [...] | Coverage: 0.11% |
---|
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds.h: 235 - 278 |
-------------------------------------------------------------------------------- |
235: const T* restrict py = R0 + r0_stride; |
236: const T* restrict pz = R0 + r0_stride * 2; |
[...] |
249: const T flip = iat < flip_ind ? one : minusone; |
250: const T displ_0 = (px[iat] - x0) * flip; |
251: const T displ_1 = (py[iat] - y0) * flip; |
252: const T displ_2 = (pz[iat] - z0) * flip; |
253: |
254: const T ar_0 = -std::floor(displ_0 * g00 + displ_1 * g10 + displ_2 * g20); |
255: const T ar_1 = -std::floor(displ_0 * g01 + displ_1 * g11 + displ_2 * g21); |
256: const T ar_2 = -std::floor(displ_0 * g02 + displ_1 * g12 + displ_2 * g22); |
257: |
258: const T delx = displ_0 + ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
259: const T dely = displ_1 + ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
260: const T delz = displ_2 + ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
261: |
262: T rmin = delx * delx + dely * dely + delz * delz; |
263: int ic = 0; |
264: #pragma unroll(7) |
265: for (int c = 1; c < 8; ++c) |
266: { |
267: const T x = delx + cellx[c]; |
268: const T y = dely + celly[c]; |
269: const T z = delz + cellz[c]; |
270: const T r2 = x * x + y * y + z * z; |
271: ic = (r2 < rmin) ? c : ic; |
272: rmin = (r2 < rmin) ? r2 : rmin; |
273: } |
274: |
275: temp_r[iat] = std::sqrt(rmin); |
276: dx[iat] = flip * (delx + cellx[ic]); |
277: dy[iat] = flip * (dely + celly[ic]); |
278: dz[iat] = flip * (delz + cellz[ic]); |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Particle/DistanceTableBA.h: 65 - 105 |
-------------------------------------------------------------------------------- |
65: inline void evaluate(ParticleSet& P) |
[...] |
73: const size_t ntgt_local = Ntargets; |
74: const size_t ntgt_padded = getAlignedSize<T>(Ntargets); |
75: const size_t nsrc_padded = getAlignedSize<T>(Nsources); |
[...] |
83: const size_t num_teams = (nsrc_padded + ChunkSizePerTeam - 1) / ChunkSizePerTeam; |
84: |
85: PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(Ntargets * num_teams) \ |
86: map(to: src_ptr[:D*Origin->RSoA.capacity()], tgt_ptr[:D*P.RSoA.capacity()]) \ |
87: map(from: dist_ptr[:Distances.size()], displ_ptr[:memoryPool.size()])") |
88: for (size_t iat = 0; iat < ntgt_local; ++iat) |
89: for (size_t team_id = 0; team_id < num_teams; ++team_id) |
90: { |
91: T pos[D]; |
92: for (int idim = 0; idim < D; idim++) |
93: pos[idim] = *(tgt_ptr + ntgt_padded * idim + iat); |
94: |
95: const size_t first = ChunkSizePerTeam * team_id; |
96: const size_t last = std::min(first + ChunkSizePerTeam, nsrc_padded); |
97: |
98: PRAGMA_OFFLOAD("omp parallel for") |
99: for (size_t jel = first; jel < last; ++jel) |
100: { |
101: DTD_BConds<T, D, SC>::computeDistancesOffload(pos, src_ptr, nsrc_padded, dist_ptr + nsrc_padded * iat, |
102: displ_ptr + nsrc_padded * D * iat, nsrc_padded, jel); |
103: } |
104: } |
105: } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 248 - 248 |
-------------------------------------------------------------------------------- |
248: inline pointer data() { return X; } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/aligned_allocator.hpp: 46 - 46 |
-------------------------------------------------------------------------------- |
46: return ((n + ND - 1) / ND) * ND; |
/usr/include/c++/13.1.1/bits/stl_vector.h: 1258 - 1258 |
-------------------------------------------------------------------------------- |
1258: { return _M_data_ptr(this->_M_impl._M_start); } |
/usr/include/c++/13.1.1/bits/stl_algobase.h: 238 - 238 |
-------------------------------------------------------------------------------- |
238: if (__b < __a) |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 237 - 239 |
-------------------------------------------------------------------------------- |
237: T* data() { return myData; } |
238: ///return the base |
239: const T* data() const { return myData; } |
0x4983f0 PUSH %RBP |
0x4983f1 MOV %RSI,%R8 |
0x4983f4 MOV %RDI,%R9 |
0x4983f7 MOV %RSP,%RBP |
0x4983fa PUSH %R15 |
0x4983fc PUSH %R14 |
0x4983fe PUSH %R13 |
0x498400 PUSH %R12 |
0x498402 PUSH %RBX |
0x498403 SUB $0x78,%RSP |
0x498407 MOVSXD 0xc4(%RDI),%RSI |
0x49840e MOVSXD 0xc8(%RDI),%RDX |
0x498415 MOV 0x8(%R9),%RCX |
0x498419 MOV 0x40(%RDI),%R10 |
0x49841d ADD $0x7,%RSI |
0x498421 LEA 0x7(%RDX),%RAX |
0x498425 MOV 0xa8(%RDI),%RDI |
0x49842c MOV 0x610(%R8),%R14 |
0x498433 AND $-0x8,%RSI |
0x498437 MOV 0x610(%RCX),%R15 |
0x49843e AND $-0x8,%RAX |
0x498442 LEA 0x7f(%RSI),%RCX |
0x498446 SHR $0x7,%RCX |
0x49844a TEST %RDX,%RDX |
0x49844d JE 4989f4 |
0x498453 LEA (%RSI,%RSI,1),%EBX |
0x498456 SAL $0x3,%RAX |
0x49845a LEA (%RSI,%RSI,2),%R12 |
0x49845e MOV %RSI,-0xf0(%RBP) |
0x498465 MOVSXD %EBX,%R13 |
0x498468 LEA (%R14,%RAX,1),%RBX |
0x49846c LEA (,%R12,8),%R8 |
0x498474 MOV %RDI,%R12 |
0x498477 LEA (,%RSI,8),%R11 |
0x49847f ADD %RBX,%RAX |
0x498482 MOV %RCX,%RDI |
0x498485 MOV %RBX,-0xe0(%RBP) |
0x49848c SAL $0x7,%RDI |
0x498490 MOV %R11,-0x100(%RBP) |
0x498497 VMOVSD 0x37a91(%RIP),%XMM10 |
0x49849f MOVSXD %ESI,%R11 |
0x4984a2 MOV %R8,-0x108(%RBP) |
0x4984a9 XOR %R8D,%R8D |
0x4984ac MOV %RAX,-0xd8(%RBP) |
0x4984b3 XOR %EAX,%EAX |
0x4984b5 MOV %RDI,-0xe8(%RBP) |
0x4984bc MOV %RAX,%RDI |
0x4984bf MOV %R14,-0xf8(%RBP) |
0x4984c6 MOV %R11,%R14 |
0x4984c9 MOV %R13,%R11 |
0x4984cc MOV %R8,%R13 |
(1147) 0x4984cf TEST %RCX,%RCX |
(1147) 0x4984d2 JE 4989d4 |
(1147) 0x4984d8 MOV %RDX,-0x110(%RBP) |
(1147) 0x4984df XOR %EAX,%EAX |
(1147) 0x4984e1 MOV %R10,%RBX |
(1147) 0x4984e4 MOV %RCX,-0x118(%RBP) |
(1147) 0x4984eb NOPL (%RAX,%RAX,1) |
(1148) 0x4984f0 MOV -0xf0(%RBP),%R8 |
(1148) 0x4984f7 MOV %RAX,%RCX |
(1148) 0x4984fa SUB $-0x80,%RAX |
(1148) 0x4984fe MOV -0xf8(%RBP),%RDX |
(1148) 0x498505 MOV -0xe0(%RBP),%R10 |
(1148) 0x49850c MOV -0xd8(%RBP),%RSI |
(1148) 0x498513 CMP %R8,%RAX |
(1148) 0x498516 VMOVSD (%RDX,%RDI,8),%XMM9 |
(1148) 0x49851b CMOVBE %RAX,%R8 |
(1148) 0x49851f VMOVSD (%R10,%RDI,8),%XMM8 |
(1148) 0x498525 VMOVSD (%RSI,%RDI,8),%XMM7 |
(1148) 0x49852a CMP %R8,%RCX |
(1148) 0x49852d JAE 4989b3 |
(1148) 0x498533 VMOVSD 0x208(%R9),%XMM5 |
(1148) 0x49853c VMOVSD 0x190(%R9),%XMM0 |
(1148) 0x498545 VMOVSD 0x1d0(%R9),%XMM1 |
(1148) 0x49854e VMOVSD 0x210(%R9),%XMM2 |
(1148) 0x498557 VMOVSD 0x198(%R9),%XMM3 |
(1148) 0x498560 VMOVSD 0x1d8(%R9),%XMM4 |
(1148) 0x498569 VMOVSD %XMM5,-0x30(%RBP) |
(1148) 0x49856e VMOVSD %XMM0,-0x98(%RBP) |
(1148) 0x498576 VMOVSD 0xf0(%R9),%XMM31 |
(1148) 0x49857d VMOVSD 0xf8(%R9),%XMM30 |
(1148) 0x498584 VMOVSD %XMM1,-0xa0(%RBP) |
(1148) 0x49858c VMOVSD 0x100(%R9),%XMM29 |
(1148) 0x498593 VMOVSD 0x108(%R9),%XMM28 |
(1148) 0x49859a VMOVSD %XMM2,-0xc0(%RBP) |
(1148) 0x4985a2 VMOVSD 0x110(%R9),%XMM27 |
(1148) 0x4985a9 VMOVSD 0x118(%R9),%XMM26 |
(1148) 0x4985b0 VMOVSD %XMM3,-0x90(%RBP) |
(1148) 0x4985b8 VMOVSD 0x120(%R9),%XMM25 |
(1148) 0x4985bf VMOVSD 0x128(%R9),%XMM24 |
(1148) 0x4985c6 VMOVSD %XMM4,-0x88(%RBP) |
(1148) 0x4985ce VMOVSD 0x130(%R9),%XMM23 |
(1148) 0x4985d5 VMOVSD 0x138(%R9),%XMM22 |
(1148) 0x4985dc VMOVSD 0x140(%R9),%XMM21 |
(1148) 0x4985e3 VMOVSD 0x148(%R9),%XMM20 |
(1148) 0x4985ea VMOVSD 0x150(%R9),%XMM19 |
(1148) 0x4985f1 VMOVSD 0x158(%R9),%XMM18 |
(1148) 0x4985f8 VMOVSD 0x160(%R9),%XMM17 |
(1148) 0x4985ff VMOVSD 0x168(%R9),%XMM16 |
(1148) 0x498606 VMOVSD 0x170(%R9),%XMM15 |
(1148) 0x49860f VMOVSD 0x178(%R9),%XMM14 |
(1148) 0x498618 VMOVSD 0x188(%R9),%XMM13 |
(1148) 0x498621 VMOVSD 0x1c8(%R9),%XMM12 |
(1148) 0x49862a VMOVSD 0x218(%R9),%XMM6 |
(1148) 0x498633 VMOVSD 0x1a0(%R9),%XMM11 |
(1148) 0x49863c MOV %RAX,-0xc8(%RBP) |
(1148) 0x498643 VMOVSD 0x1e0(%R9),%XMM5 |
(1148) 0x49864c VMOVSD 0x220(%R9),%XMM0 |
(1148) 0x498655 MOV %RDI,-0xd0(%RBP) |
(1148) 0x49865c VMOVSD 0x1a8(%R9),%XMM1 |
(1148) 0x498665 VMOVSD 0x1e8(%R9),%XMM2 |
(1148) 0x49866e VMOVSD %XMM6,-0x80(%RBP) |
(1148) 0x498673 VMOVSD 0x228(%R9),%XMM3 |
(1148) 0x49867c VMOVSD 0x1b0(%R9),%XMM4 |
(1148) 0x498685 VMOVSD %XMM11,-0x78(%RBP) |
(1148) 0x49868a VMOVSD 0x1f0(%R9),%XMM6 |
(1148) 0x498693 VMOVSD 0x230(%R9),%XMM11 |
(1148) 0x49869c VMOVSD %XMM5,-0x70(%RBP) |
(1148) 0x4986a1 VMOVSD %XMM0,-0x68(%RBP) |
(1148) 0x4986a6 VMOVSD 0x1b8(%R9),%XMM5 |
(1148) 0x4986af VMOVSD 0x1f8(%R9),%XMM0 |
(1148) 0x4986b8 VMOVSD %XMM1,-0x60(%RBP) |
(1148) 0x4986bd VMOVSD 0x238(%R9),%XMM1 |
(1148) 0x4986c6 VMOVSD %XMM2,-0x58(%RBP) |
(1148) 0x4986cb VMOVSD %XMM3,-0x50(%RBP) |
(1148) 0x4986d0 VMOVSD %XMM4,-0x48(%RBP) |
(1148) 0x4986d5 VMOVSD %XMM6,-0x40(%RBP) |
(1148) 0x4986da VMOVSD %XMM11,-0x38(%RBP) |
(1148) 0x4986df VMOVSD %XMM5,-0xb8(%RBP) |
(1148) 0x4986e7 VMOVSD %XMM0,-0xb0(%RBP) |
(1148) 0x4986ef VMOVSD %XMM1,-0xa8(%RBP) |
(1148) 0x4986f7 NOPW (%RAX,%RAX,1) |
(1149) 0x498700 MOVSXD %ECX,%RDX |
(1149) 0x498703 VMOVSD %XMM10,%XMM10,%XMM5 |
(1149) 0x498707 TEST %ECX,%ECX |
(1149) 0x498709 JNS 498717 |
(1149) 0x49870b MOV 0x378ce(%RIP),%RAX |
(1149) 0x498712 VMOVQ %RAX,%XMM5 |
(1149) 0x498717 LEA (%RDX,%R14,1),%RDI |
(1149) 0x49871b VMOVSD (%R15,%RDX,8),%XMM2 |
(1149) 0x498721 LEA (%R11,%RDX,1),%RSI |
(1149) 0x498725 VMOVSD (%R15,%RDI,8),%XMM4 |
(1149) 0x49872b VMOVSD (%R15,%RSI,8),%XMM11 |
(1149) 0x498731 VSUBSD %XMM9,%XMM2,%XMM3 |
(1149) 0x498736 VSUBSD %XMM8,%XMM4,%XMM6 |
(1149) 0x49873b VSUBSD %XMM7,%XMM11,%XMM0 |
(1149) 0x49873f VMULSD %XMM5,%XMM3,%XMM3 |
(1149) 0x498743 VMULSD %XMM5,%XMM6,%XMM2 |
(1149) 0x498747 VMULSD %XMM5,%XMM0,%XMM0 |
(1149) 0x49874b VMULSD %XMM27,%XMM2,%XMM4 |
(1149) 0x498751 VMULSD %XMM30,%XMM2,%XMM1 |
(1149) 0x498757 VFMADD231SD %XMM28,%XMM3,%XMM4 |
(1149) 0x49875d VFMADD231SD %XMM31,%XMM3,%XMM1 |
(1149) 0x498763 VFMADD231SD %XMM26,%XMM0,%XMM4 |
(1149) 0x498769 VFMADD231SD %XMM29,%XMM0,%XMM1 |
(1149) 0x49876f VRNDSCALESD $0x9,%XMM4,%XMM4,%XMM6 |
(1149) 0x498776 VMULSD %XMM24,%XMM2,%XMM4 |
(1149) 0x49877c VRNDSCALESD $0x9,%XMM1,%XMM1,%XMM11 |
(1149) 0x498783 VXORPD 0x36525(%RIP),%XMM6,%XMM1 |
(1149) 0x49878b VMULSD %XMM21,%XMM1,%XMM6 |
(1149) 0x498791 VFMADD231SD %XMM25,%XMM3,%XMM4 |
(1149) 0x498797 VFNMADD231SD %XMM22,%XMM11,%XMM6 |
(1149) 0x49879d VFMADD231SD %XMM23,%XMM0,%XMM4 |
(1149) 0x4987a3 VRNDSCALESD $0x9,%XMM4,%XMM4,%XMM4 |
(1149) 0x4987aa VFNMADD231SD %XMM20,%XMM4,%XMM3 |
(1149) 0x4987b0 VFNMADD231SD %XMM17,%XMM4,%XMM2 |
(1149) 0x4987b6 VFNMADD231SD %XMM14,%XMM4,%XMM0 |
(1149) 0x4987bb VADDSD %XMM3,%XMM6,%XMM3 |
(1149) 0x4987bf VMULSD %XMM18,%XMM1,%XMM6 |
(1149) 0x4987c5 VMULSD %XMM15,%XMM1,%XMM1 |
(1149) 0x4987ca VFNMADD231SD %XMM19,%XMM11,%XMM6 |
(1149) 0x4987d0 VFNMADD231SD %XMM16,%XMM11,%XMM1 |
(1149) 0x4987d6 VADDSD %XMM13,%XMM3,%XMM11 |
(1149) 0x4987db VADDSD %XMM2,%XMM6,%XMM2 |
(1149) 0x4987df VADDSD %XMM0,%XMM1,%XMM0 |
(1149) 0x4987e3 VADDSD %XMM12,%XMM2,%XMM6 |
(1149) 0x4987e8 VMULSD %XMM2,%XMM2,%XMM4 |
(1149) 0x4987ec VADDSD -0x30(%RBP),%XMM0,%XMM1 |
(1149) 0x4987f1 VMULSD %XMM6,%XMM6,%XMM6 |
(1149) 0x4987f5 VFMADD231SD %XMM3,%XMM3,%XMM4 |
(1149) 0x4987fa VFMADD132SD %XMM11,%XMM6,%XMM11 |
(1149) 0x4987ff VADDSD -0xc0(%RBP),%XMM0,%XMM6 |
(1149) 0x498807 VFMADD231SD %XMM0,%XMM0,%XMM4 |
(1149) 0x49880c VFMADD132SD %XMM1,%XMM11,%XMM1 |
(1149) 0x498811 VADDSD -0xa0(%RBP),%XMM2,%XMM11 |
(1149) 0x498819 VMULSD %XMM11,%XMM11,%XMM11 |
(1149) 0x49881e VCOMISD %XMM1,%XMM4 |
(1149) 0x498822 VMINSD %XMM1,%XMM4,%XMM4 |
(1149) 0x498826 VADDSD -0x98(%RBP),%XMM3,%XMM1 |
(1149) 0x49882e VFMADD132SD %XMM1,%XMM11,%XMM1 |
(1149) 0x498833 VADDSD -0x88(%RBP),%XMM2,%XMM11 |
(1149) 0x49883b SETA %R10B |
(1149) 0x49883f MOVZX %R10B,%EAX |
(1149) 0x498843 MOV $0x2,%R10D |
(1149) 0x498849 VMULSD %XMM11,%XMM11,%XMM11 |
(1149) 0x49884e VFMADD231SD %XMM6,%XMM6,%XMM1 |
(1149) 0x498853 VADDSD -0x80(%RBP),%XMM0,%XMM6 |
(1149) 0x498858 VCOMISD %XMM1,%XMM4 |
(1149) 0x49885c VMINSD %XMM4,%XMM1,%XMM1 |
(1149) 0x498860 VADDSD -0x90(%RBP),%XMM3,%XMM4 |
(1149) 0x498868 VFMADD132SD %XMM4,%XMM11,%XMM4 |
(1149) 0x49886d VADDSD -0x70(%RBP),%XMM2,%XMM11 |
(1149) 0x498872 CMOVA %R10,%RAX |
(1149) 0x498876 MOV $0x3,%R10D |
(1149) 0x49887c VMULSD %XMM11,%XMM11,%XMM11 |
(1149) 0x498881 VFMADD231SD %XMM6,%XMM6,%XMM4 |
(1149) 0x498886 VADDSD -0x68(%RBP),%XMM0,%XMM6 |
(1149) 0x49888b VCOMISD %XMM4,%XMM1 |
(1149) 0x49888f VMINSD %XMM1,%XMM4,%XMM4 |
(1149) 0x498893 VADDSD -0x78(%RBP),%XMM3,%XMM1 |
(1149) 0x498898 VFMADD132SD %XMM1,%XMM11,%XMM1 |
(1149) 0x49889d VADDSD -0x58(%RBP),%XMM2,%XMM11 |
(1149) 0x4988a2 CMOVA %R10,%RAX |
(1149) 0x4988a6 MOV $0x4,%R10D |
(1149) 0x4988ac VMULSD %XMM11,%XMM11,%XMM11 |
(1149) 0x4988b1 VFMADD231SD %XMM6,%XMM6,%XMM1 |
(1149) 0x4988b6 VADDSD -0x50(%RBP),%XMM0,%XMM6 |
(1149) 0x4988bb VCOMISD %XMM1,%XMM4 |
(1149) 0x4988bf VMINSD %XMM4,%XMM1,%XMM1 |
(1149) 0x4988c3 VADDSD -0x60(%RBP),%XMM3,%XMM4 |
(1149) 0x4988c8 VFMADD132SD %XMM4,%XMM11,%XMM4 |
(1149) 0x4988cd VADDSD -0x40(%RBP),%XMM2,%XMM11 |
(1149) 0x4988d2 CMOVA %R10,%RAX |
(1149) 0x4988d6 MOV $0x5,%R10D |
(1149) 0x4988dc VMULSD %XMM11,%XMM11,%XMM11 |
(1149) 0x4988e1 VFMADD231SD %XMM6,%XMM6,%XMM4 |
(1149) 0x4988e6 VADDSD -0x38(%RBP),%XMM0,%XMM6 |
(1149) 0x4988eb VCOMISD %XMM4,%XMM1 |
(1149) 0x4988ef VMINSD %XMM1,%XMM4,%XMM4 |
(1149) 0x4988f3 VADDSD -0x48(%RBP),%XMM3,%XMM1 |
(1149) 0x4988f8 VFMADD132SD %XMM1,%XMM11,%XMM1 |
(1149) 0x4988fd VADDSD -0xb0(%RBP),%XMM2,%XMM11 |
(1149) 0x498905 CMOVA %R10,%RAX |
(1149) 0x498909 MOV $0x6,%R10D |
(1149) 0x49890f VMULSD %XMM11,%XMM11,%XMM11 |
(1149) 0x498914 VFMADD231SD %XMM6,%XMM6,%XMM1 |
(1149) 0x498919 VADDSD -0xa8(%RBP),%XMM0,%XMM6 |
(1149) 0x498921 VCOMISD %XMM1,%XMM4 |
(1149) 0x498925 VMINSD %XMM4,%XMM1,%XMM1 |
(1149) 0x498929 VADDSD -0xb8(%RBP),%XMM3,%XMM4 |
(1149) 0x498931 VFMADD132SD %XMM4,%XMM11,%XMM4 |
(1149) 0x498936 CMOVA %R10,%RAX |
(1149) 0x49893a MOV $0x7,%R10D |
(1149) 0x498940 VFMADD231SD %XMM6,%XMM6,%XMM4 |
(1149) 0x498945 VCOMISD %XMM4,%XMM1 |
(1149) 0x498949 VMINSD %XMM4,%XMM1,%XMM1 |
(1149) 0x49894d CMOVA %R10,%RAX |
(1149) 0x498951 VSQRTSD %XMM1,%XMM1,%XMM1 |
(1149) 0x498955 LEA (%RBX,%RDX,8),%R10 |
(1149) 0x498959 INC %RCX |
(1149) 0x49895c LEA (%R9,%RAX,8),%RAX |
(1149) 0x498960 VADDSD 0x180(%RAX),%XMM3,%XMM3 |
(1149) 0x498968 VADDSD 0x1c0(%RAX),%XMM2,%XMM2 |
(1149) 0x498970 VMOVSD %XMM1,(%R10,%R13,1) |
(1149) 0x498976 VADDSD 0x200(%RAX),%XMM0,%XMM0 |
(1149) 0x49897e VMULSD %XMM5,%XMM3,%XMM4 |
(1149) 0x498982 VMULSD %XMM5,%XMM2,%XMM6 |
(1149) 0x498986 VMULSD %XMM5,%XMM0,%XMM5 |
(1149) 0x49898a VMOVSD %XMM4,(%R12,%RDX,8) |
(1149) 0x498990 VMOVSD %XMM6,(%R12,%RDI,8) |
(1149) 0x498996 VMOVSD %XMM5,(%R12,%RSI,8) |
(1149) 0x49899c CMP %R8,%RCX |
(1149) 0x49899f JNE 498700 |
(1148) 0x4989a5 MOV -0xc8(%RBP),%RAX |
(1148) 0x4989ac MOV -0xd0(%RBP),%RDI |
(1148) 0x4989b3 MOV -0xe8(%RBP),%RCX |
(1148) 0x4989ba CMP %RCX,%RAX |
(1148) 0x4989bd JNE 4984f0 |
(1147) 0x4989c3 MOV -0x110(%RBP),%RDX |
(1147) 0x4989ca MOV -0x118(%RBP),%RCX |
(1147) 0x4989d1 MOV %RBX,%R10 |
(1147) 0x4989d4 MOV -0x100(%RBP),%RBX |
(1147) 0x4989db MOV -0x108(%RBP),%R8 |
(1147) 0x4989e2 INC %RDI |
(1147) 0x4989e5 ADD %RBX,%R13 |
(1147) 0x4989e8 ADD %R8,%R12 |
(1147) 0x4989eb CMP %RDI,%RDX |
(1147) 0x4989ee JNE 4984cf |
0x4989f4 ADD $0x78,%RSP |
0x4989f8 POP %RBX |
0x4989f9 POP %R12 |
0x4989fb POP %R13 |
0x4989fd POP %R14 |
0x4989ff POP %R15 |
0x498a01 POP %RBP |
0x498a02 RET |
0x498a03 NOPW %CS:(%RAX,%RAX,1) |
0x498a0d NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | qmcplusplus::ParticleSet::upda[...] | ParticleSet.cpp:250 | exec |
○ | main._omp_fn.0 | miniqmc.cpp:390 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | DistanceTableBA.h:65-105 |
Module | exec |
nb instructions | 61 |
nb uops | 61 |
loop length | 251 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 7 |
micro-operation queue | 15.25 cycles |
front end | 15.25 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 5.25 | 5.25 | 9.33 | 9.33 | 13.00 | 5.25 | 5.25 | 9.33 |
cycles | 5.25 | 5.75 | 9.33 | 9.33 | 13.00 | 5.25 | 5.25 | 9.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 15.37 |
Stall cycles | 0.00 |
Front-end | 15.25 |
Dispatch | 13.00 |
Overall L1 | 15.25 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDI,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0x78,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOVSXD 0xc4(%RDI),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD 0xc8(%RDI),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%R9),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x40(%RDI),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD $0x7,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x7(%RDX),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RDI),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x610(%R8),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
AND $-0x8,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x610(%RCX),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
AND $-0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x7f(%RSI),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SHR $0x7,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
TEST %RDX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 4989f4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA (%RSI,%RSI,1),%EBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (%RSI,%RSI,2),%R12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,-0xf0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOVSXD %EBX,%R13 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R14,%RAX,1),%RBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R12,8),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (,%RSI,8),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RBX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RBX,-0xe0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SAL $0x7,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %R11,-0x100(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0x37a91(%RIP),%XMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD %ESI,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R8,-0x108(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,-0xd8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDI,-0xe8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R14,-0xf8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R11,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R13,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R8,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0x78,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | DistanceTableBA.h:65-105 |
Module | exec |
nb instructions | 61 |
nb uops | 61 |
loop length | 251 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 7 |
micro-operation queue | 15.25 cycles |
front end | 15.25 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 5.25 | 5.25 | 9.33 | 9.33 | 13.00 | 5.25 | 5.25 | 9.33 |
cycles | 5.25 | 5.75 | 9.33 | 9.33 | 13.00 | 5.25 | 5.25 | 9.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 15.37 |
Stall cycles | 0.00 |
Front-end | 15.25 |
Dispatch | 13.00 |
Overall L1 | 15.25 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDI,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0x78,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOVSXD 0xc4(%RDI),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD 0xc8(%RDI),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%R9),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x40(%RDI),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD $0x7,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x7(%RDX),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RDI),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x610(%R8),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
AND $-0x8,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x610(%RCX),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
AND $-0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x7f(%RSI),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SHR $0x7,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
TEST %RDX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 4989f4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA (%RSI,%RSI,1),%EBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (%RSI,%RSI,2),%R12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,-0xf0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOVSXD %EBX,%R13 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R14,%RAX,1),%RBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R12,8),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (,%RSI,8),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RBX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RBX,-0xe0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SAL $0x7,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %R11,-0x100(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD 0x37a91(%RIP),%XMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVSXD %ESI,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R8,-0x108(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,-0xd8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDI,-0xe8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R14,-0xf8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R11,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R13,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R8,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0x78,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼qmcplusplus::DistanceTableBA | 0.11 | 0.1 |
▼Loop 1147 - DistanceTableBA.h:88-101 - exec– | 0 | 0 |
▼Loop 1148 - DistanceTableBA.h:89-101 - exec– | 0 | 0 |
○Loop 1149 - DistanceTableBA.h:99-101 - exec | 0.11 | 0.1 |