Function: void qmcplusplus::DTD_BConds<double, 3u, 39>::computeDistances<qmcplusplus::TinyVector<dou ... | Module: exec | Source: ParticleBConds.h:159-219 [...] | Coverage: 43.62% |
---|
Function: void qmcplusplus::DTD_BConds<double, 3u, 39>::computeDistances<qmcplusplus::TinyVector<dou ... | Module: exec | Source: ParticleBConds.h:159-219 [...] | Coverage: 43.62% |
---|
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds.h: 159 - 219 |
-------------------------------------------------------------------------------- |
159: void computeDistances(const PT& pos, |
[...] |
167: const T x0 = pos[0]; |
168: const T y0 = pos[1]; |
169: const T z0 = pos[2]; |
[...] |
188: const T flip = iat < flip_ind ? one : minusone; |
189: const T displ_0 = (px[iat] - x0) * flip; |
190: const T displ_1 = (py[iat] - y0) * flip; |
191: const T displ_2 = (pz[iat] - z0) * flip; |
192: |
193: const T ar_0 = -std::floor(displ_0 * g00 + displ_1 * g10 + displ_2 * g20); |
194: const T ar_1 = -std::floor(displ_0 * g01 + displ_1 * g11 + displ_2 * g21); |
195: const T ar_2 = -std::floor(displ_0 * g02 + displ_1 * g12 + displ_2 * g22); |
196: |
197: const T delx = displ_0 + ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
198: const T dely = displ_1 + ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
199: const T delz = displ_2 + ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
200: |
201: T rmin = delx * delx + dely * dely + delz * delz; |
202: int ic = 0; |
203: #pragma unroll(7) |
204: for (int c = 1; c < 8; ++c) |
205: { |
206: const T x = delx + cellx[c]; |
207: const T y = dely + celly[c]; |
208: const T z = delz + cellz[c]; |
209: const T r2 = x * x + y * y + z * z; |
210: ic = (r2 < rmin) ? c : ic; |
211: rmin = (r2 < rmin) ? r2 : rmin; |
212: } |
213: |
214: temp_r[iat] = std::sqrt(rmin); |
215: dx[iat] = flip * (delx + cellx[ic]); |
216: dy[iat] = flip * (dely + celly[ic]); |
217: dz[iat] = flip * (delz + cellz[ic]); |
218: } |
219: } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 241 - 243 |
-------------------------------------------------------------------------------- |
241: T* restrict data(size_t i) { return myData + i * nGhosts; } |
242: ///return the const pointer of the i-th components |
243: const T* restrict data(size_t i) const { return myData + i * nGhosts; } |
0x4917a0 PUSH %RBP |
0x4917a1 MOV %RSI,%R11 |
0x4917a4 MOV %RDX,%RSI |
0x4917a7 MOV %RDI,%RAX |
0x4917aa SAL $0x3,%RSI |
0x4917ae LEA (,%R9,8),%RDI |
0x4917b6 MOV %RSP,%RBP |
0x4917b9 PUSH %R15 |
0x4917bb MOV 0x20(%RBP),%EDX |
0x4917be PUSH %R14 |
0x4917c0 PUSH %R13 |
0x4917c2 MOV 0x18(%RBP),%R13D |
0x4917c6 PUSH %R12 |
0x4917c8 MOV 0x10(%RBP),%R12 |
0x4917cc PUSH %RBX |
0x4917cd LEA (%R12,%RDI,1),%RBX |
0x4917d1 ADD %RBX,%RDI |
0x4917d4 VMOVSD (%R11),%XMM5 |
0x4917d9 VMOVSD 0x8(%R11),%XMM6 |
0x4917df VMOVSD 0x10(%R11),%XMM7 |
0x4917e5 LEA (%RCX,%RSI,1),%R11 |
0x4917e9 ADD %R11,%RSI |
0x4917ec CMP %R13D,%EDX |
0x4917ef JLE 491add |
0x4917f5 MOV %RCX,%R10 |
0x4917f8 SUB %R13D,%EDX |
0x4917fb MOVSXD %R13D,%RCX |
0x4917fe VMOVQ 0x3d4aa(%RIP),%XMM9 |
0x491806 VMOVSD 0x3e722(%RIP),%XMM8 |
0x49180e VMOVSD 0x3e7ca(%RIP),%XMM10 |
0x491816 LEA (%RDX,%RCX,1),%R9 |
0x49181a MOV $0x2,%R15D |
0x491820 MOV $0x3,%R14D |
0x491826 NOPW %CS:(%RAX,%RAX,1) |
(1107) 0x491830 VMOVSD %XMM8,%XMM8,%XMM11 |
(1107) 0x491835 CMP %ECX,0x28(%RBP) |
(1107) 0x491838 JLE 49183f |
(1107) 0x49183a VMOVSD %XMM10,%XMM10,%XMM11 |
(1107) 0x49183f VMOVSD (%R11,%RCX,8),%XMM12 |
(1107) 0x491845 VMOVSD (%R10,%RCX,8),%XMM1 |
(1107) 0x49184b VMOVSD (%RSI,%RCX,8),%XMM13 |
(1107) 0x491850 VSUBSD %XMM6,%XMM12,%XMM2 |
(1107) 0x491854 VSUBSD %XMM5,%XMM1,%XMM0 |
(1107) 0x491858 VSUBSD %XMM7,%XMM13,%XMM3 |
(1107) 0x49185c VMULSD %XMM11,%XMM2,%XMM15 |
(1107) 0x491861 VMULSD %XMM11,%XMM0,%XMM14 |
(1107) 0x491866 VMULSD %XMM11,%XMM3,%XMM12 |
(1107) 0x49186b VMULSD 0x20(%RAX),%XMM15,%XMM1 |
(1107) 0x491870 VMULSD 0x8(%RAX),%XMM15,%XMM4 |
(1107) 0x491875 VMULSD 0x38(%RAX),%XMM15,%XMM3 |
(1107) 0x49187a VFMADD231SD 0x18(%RAX),%XMM14,%XMM1 |
(1107) 0x491880 VFMADD231SD (%RAX),%XMM14,%XMM4 |
(1107) 0x491885 VFMADD231SD 0x30(%RAX),%XMM14,%XMM3 |
(1107) 0x49188b VFMADD231SD 0x28(%RAX),%XMM12,%XMM1 |
(1107) 0x491891 VFMADD231SD 0x10(%RAX),%XMM12,%XMM4 |
(1107) 0x491897 VFMADD231SD 0x40(%RAX),%XMM12,%XMM3 |
(1107) 0x49189d VRNDSCALESD $0x9,%XMM1,%XMM1,%XMM0 |
(1107) 0x4918a4 VXORPD %XMM9,%XMM0,%XMM2 |
(1107) 0x4918a9 VMULSD 0x68(%RAX),%XMM2,%XMM0 |
(1107) 0x4918ae VRNDSCALESD $0x9,%XMM4,%XMM4,%XMM13 |
(1107) 0x4918b5 VMULSD 0x50(%RAX),%XMM2,%XMM1 |
(1107) 0x4918ba VRNDSCALESD $0x9,%XMM3,%XMM3,%XMM4 |
(1107) 0x4918c1 VFNMADD231SD 0x70(%RAX),%XMM4,%XMM15 |
(1107) 0x4918c7 VMULSD 0x80(%RAX),%XMM2,%XMM2 |
(1107) 0x4918cf VFNMADD231SD 0x88(%RAX),%XMM4,%XMM12 |
(1107) 0x4918d8 VFNMADD231SD 0x58(%RAX),%XMM4,%XMM14 |
(1107) 0x4918de VFNMADD231SD 0x60(%RAX),%XMM13,%XMM0 |
(1107) 0x4918e4 VFNMADD231SD 0x48(%RAX),%XMM13,%XMM1 |
(1107) 0x4918ea VFNMADD132SD 0x78(%RAX),%XMM2,%XMM13 |
(1107) 0x4918f0 VADDSD %XMM15,%XMM0,%XMM15 |
(1107) 0x4918f5 VADDSD %XMM14,%XMM1,%XMM14 |
(1107) 0x4918fa VADDSD %XMM12,%XMM13,%XMM13 |
(1107) 0x4918ff VADDSD 0xd8(%RAX),%XMM15,%XMM12 |
(1107) 0x491907 VMULSD %XMM15,%XMM15,%XMM1 |
(1107) 0x49190c VADDSD 0x98(%RAX),%XMM14,%XMM4 |
(1107) 0x491914 VADDSD 0x118(%RAX),%XMM13,%XMM3 |
(1107) 0x49191c VMULSD %XMM12,%XMM12,%XMM0 |
(1107) 0x491921 VFMADD231SD %XMM14,%XMM14,%XMM1 |
(1107) 0x491926 VFMADD132SD %XMM4,%XMM0,%XMM4 |
(1107) 0x49192b VFMADD231SD %XMM13,%XMM13,%XMM1 |
(1107) 0x491930 VFMADD132SD %XMM3,%XMM4,%XMM3 |
(1107) 0x491935 VADDSD 0xe0(%RAX),%XMM15,%XMM4 |
(1107) 0x49193d VMULSD %XMM4,%XMM4,%XMM12 |
(1107) 0x491941 VADDSD 0x128(%RAX),%XMM13,%XMM4 |
(1107) 0x491949 VMINSD %XMM3,%XMM1,%XMM2 |
(1107) 0x49194d VCOMISD %XMM3,%XMM1 |
(1107) 0x491951 VADDSD 0xa0(%RAX),%XMM14,%XMM1 |
(1107) 0x491959 VADDSD 0x120(%RAX),%XMM13,%XMM3 |
(1107) 0x491961 VFMADD132SD %XMM1,%XMM12,%XMM1 |
(1107) 0x491966 SETA %R13B |
(1107) 0x49196a VADDSD 0xf0(%RAX),%XMM15,%XMM12 |
(1107) 0x491972 MOVZX %R13B,%EDX |
(1107) 0x491976 MOV $0x4,%R13D |
(1107) 0x49197c VFMADD132SD %XMM3,%XMM1,%XMM3 |
(1107) 0x491981 VADDSD 0xe8(%RAX),%XMM15,%XMM1 |
(1107) 0x491989 VCOMISD %XMM3,%XMM2 |
(1107) 0x49198d VMINSD %XMM2,%XMM3,%XMM0 |
(1107) 0x491991 VADDSD 0xa8(%RAX),%XMM14,%XMM2 |
(1107) 0x491999 VMULSD %XMM1,%XMM1,%XMM3 |
(1107) 0x49199d VADDSD 0xb0(%RAX),%XMM14,%XMM1 |
(1107) 0x4919a5 CMOVA %R15,%RDX |
(1107) 0x4919a9 VFMADD132SD %XMM2,%XMM3,%XMM2 |
(1107) 0x4919ae VFMADD132SD %XMM4,%XMM2,%XMM4 |
(1107) 0x4919b3 VADDSD 0x130(%RAX),%XMM13,%XMM2 |
(1107) 0x4919bb VADDSD 0x138(%RAX),%XMM13,%XMM3 |
(1107) 0x4919c3 VCOMISD %XMM4,%XMM0 |
(1107) 0x4919c7 VMINSD %XMM0,%XMM4,%XMM0 |
(1107) 0x4919cb VMULSD %XMM12,%XMM12,%XMM4 |
(1107) 0x4919d0 VADDSD 0xf8(%RAX),%XMM15,%XMM12 |
(1107) 0x4919d8 CMOVA %R14,%RDX |
(1107) 0x4919dc VFMADD132SD %XMM1,%XMM4,%XMM1 |
(1107) 0x4919e1 VADDSD 0xc0(%RAX),%XMM14,%XMM4 |
(1107) 0x4919e9 VFMADD132SD %XMM2,%XMM1,%XMM2 |
(1107) 0x4919ee VADDSD 0xb8(%RAX),%XMM14,%XMM1 |
(1107) 0x4919f6 VCOMISD %XMM2,%XMM0 |
(1107) 0x4919fa VMINSD %XMM0,%XMM2,%XMM0 |
(1107) 0x4919fe VMULSD %XMM12,%XMM12,%XMM2 |
(1107) 0x491a03 CMOVA %R13,%RDX |
(1107) 0x491a07 MOV $0x5,%R13D |
(1107) 0x491a0d VFMADD132SD %XMM1,%XMM2,%XMM1 |
(1107) 0x491a12 VFMADD132SD %XMM3,%XMM1,%XMM3 |
(1107) 0x491a17 VADDSD 0x100(%RAX),%XMM15,%XMM1 |
(1107) 0x491a1f VMULSD %XMM1,%XMM1,%XMM12 |
(1107) 0x491a23 VADDSD 0x148(%RAX),%XMM13,%XMM1 |
(1107) 0x491a2b VCOMISD %XMM3,%XMM0 |
(1107) 0x491a2f VMINSD %XMM0,%XMM3,%XMM0 |
(1107) 0x491a33 VADDSD 0x140(%RAX),%XMM13,%XMM3 |
(1107) 0x491a3b VFMADD132SD %XMM4,%XMM12,%XMM4 |
(1107) 0x491a40 CMOVA %R13,%RDX |
(1107) 0x491a44 MOV $0x6,%R13D |
(1107) 0x491a4a VFMADD132SD %XMM3,%XMM4,%XMM3 |
(1107) 0x491a4f VADDSD 0x108(%RAX),%XMM15,%XMM4 |
(1107) 0x491a57 VMINSD %XMM0,%XMM3,%XMM2 |
(1107) 0x491a5b VCOMISD %XMM3,%XMM0 |
(1107) 0x491a5f VADDSD 0xc8(%RAX),%XMM14,%XMM0 |
(1107) 0x491a67 VMULSD %XMM4,%XMM4,%XMM3 |
(1107) 0x491a6b CMOVA %R13,%RDX |
(1107) 0x491a6f MOV $0x7,%R13D |
(1107) 0x491a75 VFMADD132SD %XMM0,%XMM3,%XMM0 |
(1107) 0x491a7a VFMADD132SD %XMM1,%XMM0,%XMM1 |
(1107) 0x491a7f VCOMISD %XMM1,%XMM2 |
(1107) 0x491a83 VMINSD %XMM1,%XMM2,%XMM12 |
(1107) 0x491a87 CMOVA %R13,%RDX |
(1107) 0x491a8b VSQRTSD %XMM12,%XMM12,%XMM12 |
(1107) 0x491a90 LEA (%RAX,%RDX,8),%RDX |
(1107) 0x491a94 VADDSD 0x90(%RDX),%XMM14,%XMM14 |
(1107) 0x491a9c VMOVSD %XMM12,(%R8,%RCX,8) |
(1107) 0x491aa2 VMULSD %XMM11,%XMM14,%XMM2 |
(1107) 0x491aa7 VMOVSD %XMM2,(%R12,%RCX,8) |
(1107) 0x491aad VADDSD 0xd0(%RDX),%XMM15,%XMM15 |
(1107) 0x491ab5 VMULSD %XMM11,%XMM15,%XMM0 |
(1107) 0x491aba VMOVSD %XMM0,(%RBX,%RCX,8) |
(1107) 0x491abf VADDSD 0x110(%RDX),%XMM13,%XMM13 |
(1107) 0x491ac7 VMULSD %XMM11,%XMM13,%XMM11 |
(1107) 0x491acc VMOVSD %XMM11,(%RDI,%RCX,8) |
(1107) 0x491ad1 INC %RCX |
(1107) 0x491ad4 CMP %RCX,%R9 |
(1107) 0x491ad7 JNE 491830 |
0x491add POP %RBX |
0x491ade POP %R12 |
0x491ae0 POP %R13 |
0x491ae2 POP %R14 |
0x491ae4 POP %R15 |
0x491ae6 POP %RBP |
0x491ae7 RET |
0x491ae8 NOPL (%RAX,%RAX,1) |
0x495820 PUSH %RBP |
0x495821 MOV %RSI,%R11 |
0x495824 MOV %RDX,%RSI |
0x495827 MOV %RDI,%RAX |
0x49582a SAL $0x3,%RSI |
0x49582e LEA (,%R9,8),%RDI |
0x495836 MOV %RSP,%RBP |
0x495839 PUSH %R15 |
0x49583b MOV 0x20(%RBP),%EDX |
0x49583e PUSH %R14 |
0x495840 PUSH %R13 |
0x495842 MOV 0x18(%RBP),%R13D |
0x495846 PUSH %R12 |
0x495848 MOV 0x10(%RBP),%R12 |
0x49584c PUSH %RBX |
0x49584d LEA (%R12,%RDI,1),%RBX |
0x495851 ADD %RBX,%RDI |
0x495854 VMOVSD (%R11),%XMM5 |
0x495859 VMOVSD 0x8(%R11),%XMM6 |
0x49585f VMOVSD 0x10(%R11),%XMM7 |
0x495865 LEA (%RCX,%RSI,1),%R11 |
0x495869 ADD %R11,%RSI |
0x49586c CMP %R13D,%EDX |
0x49586f JLE 495b5d |
0x495875 MOV %RCX,%R10 |
0x495878 SUB %R13D,%EDX |
0x49587b MOVSXD %R13D,%RCX |
0x49587e VMOVQ 0x3942a(%RIP),%XMM9 |
0x495886 VMOVSD 0x3a6a2(%RIP),%XMM8 |
0x49588e VMOVSD 0x3a74a(%RIP),%XMM10 |
0x495896 LEA (%RDX,%RCX,1),%R9 |
0x49589a MOV $0x2,%R15D |
0x4958a0 MOV $0x3,%R14D |
0x4958a6 NOPW %CS:(%RAX,%RAX,1) |
(1106) 0x4958b0 VMOVSD %XMM8,%XMM8,%XMM11 |
(1106) 0x4958b5 CMP %ECX,0x28(%RBP) |
(1106) 0x4958b8 JLE 4958bf |
(1106) 0x4958ba VMOVSD %XMM10,%XMM10,%XMM11 |
(1106) 0x4958bf VMOVSD (%R11,%RCX,8),%XMM12 |
(1106) 0x4958c5 VMOVSD (%R10,%RCX,8),%XMM1 |
(1106) 0x4958cb VMOVSD (%RSI,%RCX,8),%XMM13 |
(1106) 0x4958d0 VSUBSD %XMM6,%XMM12,%XMM2 |
(1106) 0x4958d4 VSUBSD %XMM5,%XMM1,%XMM0 |
(1106) 0x4958d8 VSUBSD %XMM7,%XMM13,%XMM3 |
(1106) 0x4958dc VMULSD %XMM11,%XMM2,%XMM15 |
(1106) 0x4958e1 VMULSD %XMM11,%XMM0,%XMM14 |
(1106) 0x4958e6 VMULSD %XMM11,%XMM3,%XMM12 |
(1106) 0x4958eb VMULSD 0x20(%RAX),%XMM15,%XMM1 |
(1106) 0x4958f0 VMULSD 0x8(%RAX),%XMM15,%XMM4 |
(1106) 0x4958f5 VMULSD 0x38(%RAX),%XMM15,%XMM3 |
(1106) 0x4958fa VFMADD231SD 0x18(%RAX),%XMM14,%XMM1 |
(1106) 0x495900 VFMADD231SD (%RAX),%XMM14,%XMM4 |
(1106) 0x495905 VFMADD231SD 0x30(%RAX),%XMM14,%XMM3 |
(1106) 0x49590b VFMADD231SD 0x28(%RAX),%XMM12,%XMM1 |
(1106) 0x495911 VFMADD231SD 0x10(%RAX),%XMM12,%XMM4 |
(1106) 0x495917 VFMADD231SD 0x40(%RAX),%XMM12,%XMM3 |
(1106) 0x49591d VRNDSCALESD $0x9,%XMM1,%XMM1,%XMM0 |
(1106) 0x495924 VXORPD %XMM9,%XMM0,%XMM2 |
(1106) 0x495929 VMULSD 0x68(%RAX),%XMM2,%XMM0 |
(1106) 0x49592e VRNDSCALESD $0x9,%XMM4,%XMM4,%XMM13 |
(1106) 0x495935 VMULSD 0x50(%RAX),%XMM2,%XMM1 |
(1106) 0x49593a VRNDSCALESD $0x9,%XMM3,%XMM3,%XMM4 |
(1106) 0x495941 VFNMADD231SD 0x70(%RAX),%XMM4,%XMM15 |
(1106) 0x495947 VMULSD 0x80(%RAX),%XMM2,%XMM2 |
(1106) 0x49594f VFNMADD231SD 0x88(%RAX),%XMM4,%XMM12 |
(1106) 0x495958 VFNMADD231SD 0x58(%RAX),%XMM4,%XMM14 |
(1106) 0x49595e VFNMADD231SD 0x60(%RAX),%XMM13,%XMM0 |
(1106) 0x495964 VFNMADD231SD 0x48(%RAX),%XMM13,%XMM1 |
(1106) 0x49596a VFNMADD132SD 0x78(%RAX),%XMM2,%XMM13 |
(1106) 0x495970 VADDSD %XMM15,%XMM0,%XMM15 |
(1106) 0x495975 VADDSD %XMM14,%XMM1,%XMM14 |
(1106) 0x49597a VADDSD %XMM12,%XMM13,%XMM13 |
(1106) 0x49597f VADDSD 0xd8(%RAX),%XMM15,%XMM12 |
(1106) 0x495987 VMULSD %XMM15,%XMM15,%XMM1 |
(1106) 0x49598c VADDSD 0x98(%RAX),%XMM14,%XMM4 |
(1106) 0x495994 VADDSD 0x118(%RAX),%XMM13,%XMM3 |
(1106) 0x49599c VMULSD %XMM12,%XMM12,%XMM0 |
(1106) 0x4959a1 VFMADD231SD %XMM14,%XMM14,%XMM1 |
(1106) 0x4959a6 VFMADD132SD %XMM4,%XMM0,%XMM4 |
(1106) 0x4959ab VFMADD231SD %XMM13,%XMM13,%XMM1 |
(1106) 0x4959b0 VFMADD132SD %XMM3,%XMM4,%XMM3 |
(1106) 0x4959b5 VADDSD 0xe0(%RAX),%XMM15,%XMM4 |
(1106) 0x4959bd VMULSD %XMM4,%XMM4,%XMM12 |
(1106) 0x4959c1 VADDSD 0x128(%RAX),%XMM13,%XMM4 |
(1106) 0x4959c9 VMINSD %XMM3,%XMM1,%XMM2 |
(1106) 0x4959cd VCOMISD %XMM3,%XMM1 |
(1106) 0x4959d1 VADDSD 0xa0(%RAX),%XMM14,%XMM1 |
(1106) 0x4959d9 VADDSD 0x120(%RAX),%XMM13,%XMM3 |
(1106) 0x4959e1 VFMADD132SD %XMM1,%XMM12,%XMM1 |
(1106) 0x4959e6 SETA %R13B |
(1106) 0x4959ea VADDSD 0xf0(%RAX),%XMM15,%XMM12 |
(1106) 0x4959f2 MOVZX %R13B,%EDX |
(1106) 0x4959f6 MOV $0x4,%R13D |
(1106) 0x4959fc VFMADD132SD %XMM3,%XMM1,%XMM3 |
(1106) 0x495a01 VADDSD 0xe8(%RAX),%XMM15,%XMM1 |
(1106) 0x495a09 VCOMISD %XMM3,%XMM2 |
(1106) 0x495a0d VMINSD %XMM2,%XMM3,%XMM0 |
(1106) 0x495a11 VADDSD 0xa8(%RAX),%XMM14,%XMM2 |
(1106) 0x495a19 VMULSD %XMM1,%XMM1,%XMM3 |
(1106) 0x495a1d VADDSD 0xb0(%RAX),%XMM14,%XMM1 |
(1106) 0x495a25 CMOVA %R15,%RDX |
(1106) 0x495a29 VFMADD132SD %XMM2,%XMM3,%XMM2 |
(1106) 0x495a2e VFMADD132SD %XMM4,%XMM2,%XMM4 |
(1106) 0x495a33 VADDSD 0x130(%RAX),%XMM13,%XMM2 |
(1106) 0x495a3b VADDSD 0x138(%RAX),%XMM13,%XMM3 |
(1106) 0x495a43 VCOMISD %XMM4,%XMM0 |
(1106) 0x495a47 VMINSD %XMM0,%XMM4,%XMM0 |
(1106) 0x495a4b VMULSD %XMM12,%XMM12,%XMM4 |
(1106) 0x495a50 VADDSD 0xf8(%RAX),%XMM15,%XMM12 |
(1106) 0x495a58 CMOVA %R14,%RDX |
(1106) 0x495a5c VFMADD132SD %XMM1,%XMM4,%XMM1 |
(1106) 0x495a61 VADDSD 0xc0(%RAX),%XMM14,%XMM4 |
(1106) 0x495a69 VFMADD132SD %XMM2,%XMM1,%XMM2 |
(1106) 0x495a6e VADDSD 0xb8(%RAX),%XMM14,%XMM1 |
(1106) 0x495a76 VCOMISD %XMM2,%XMM0 |
(1106) 0x495a7a VMINSD %XMM0,%XMM2,%XMM0 |
(1106) 0x495a7e VMULSD %XMM12,%XMM12,%XMM2 |
(1106) 0x495a83 CMOVA %R13,%RDX |
(1106) 0x495a87 MOV $0x5,%R13D |
(1106) 0x495a8d VFMADD132SD %XMM1,%XMM2,%XMM1 |
(1106) 0x495a92 VFMADD132SD %XMM3,%XMM1,%XMM3 |
(1106) 0x495a97 VADDSD 0x100(%RAX),%XMM15,%XMM1 |
(1106) 0x495a9f VMULSD %XMM1,%XMM1,%XMM12 |
(1106) 0x495aa3 VADDSD 0x148(%RAX),%XMM13,%XMM1 |
(1106) 0x495aab VCOMISD %XMM3,%XMM0 |
(1106) 0x495aaf VMINSD %XMM0,%XMM3,%XMM0 |
(1106) 0x495ab3 VADDSD 0x140(%RAX),%XMM13,%XMM3 |
(1106) 0x495abb VFMADD132SD %XMM4,%XMM12,%XMM4 |
(1106) 0x495ac0 CMOVA %R13,%RDX |
(1106) 0x495ac4 MOV $0x6,%R13D |
(1106) 0x495aca VFMADD132SD %XMM3,%XMM4,%XMM3 |
(1106) 0x495acf VADDSD 0x108(%RAX),%XMM15,%XMM4 |
(1106) 0x495ad7 VMINSD %XMM0,%XMM3,%XMM2 |
(1106) 0x495adb VCOMISD %XMM3,%XMM0 |
(1106) 0x495adf VADDSD 0xc8(%RAX),%XMM14,%XMM0 |
(1106) 0x495ae7 VMULSD %XMM4,%XMM4,%XMM3 |
(1106) 0x495aeb CMOVA %R13,%RDX |
(1106) 0x495aef MOV $0x7,%R13D |
(1106) 0x495af5 VFMADD132SD %XMM0,%XMM3,%XMM0 |
(1106) 0x495afa VFMADD132SD %XMM1,%XMM0,%XMM1 |
(1106) 0x495aff VCOMISD %XMM1,%XMM2 |
(1106) 0x495b03 VMINSD %XMM1,%XMM2,%XMM12 |
(1106) 0x495b07 CMOVA %R13,%RDX |
(1106) 0x495b0b VSQRTSD %XMM12,%XMM12,%XMM12 |
(1106) 0x495b10 LEA (%RAX,%RDX,8),%RDX |
(1106) 0x495b14 VADDSD 0x90(%RDX),%XMM14,%XMM14 |
(1106) 0x495b1c VMOVSD %XMM12,(%R8,%RCX,8) |
(1106) 0x495b22 VMULSD %XMM11,%XMM14,%XMM2 |
(1106) 0x495b27 VMOVSD %XMM2,(%R12,%RCX,8) |
(1106) 0x495b2d VADDSD 0xd0(%RDX),%XMM15,%XMM15 |
(1106) 0x495b35 VMULSD %XMM11,%XMM15,%XMM0 |
(1106) 0x495b3a VMOVSD %XMM0,(%RBX,%RCX,8) |
(1106) 0x495b3f VADDSD 0x110(%RDX),%XMM13,%XMM13 |
(1106) 0x495b47 VMULSD %XMM11,%XMM13,%XMM11 |
(1106) 0x495b4c VMOVSD %XMM11,(%RDI,%RCX,8) |
(1106) 0x495b51 INC %RCX |
(1106) 0x495b54 CMP %RCX,%R9 |
(1106) 0x495b57 JNE 4958b0 |
0x495b5d POP %RBX |
0x495b5e POP %R12 |
0x495b60 POP %R13 |
0x495b62 POP %R14 |
0x495b64 POP %R15 |
0x495b66 POP %RBP |
0x495b67 RET |
0x495b68 NOPL (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►30.22+ | qmcplusplus::DistanceTableAA<d[...] | DistanceTableAA.h:84 | exec |
○ | qmcplusplus::ParticleSet::make[...] | stl_vector.h:989 | exec |
○ | main._omp_fn.1 | stl_vector.h:1123 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►24.43+ | qmcplusplus::DistanceTableAA<d[...] | DistanceTableAA.h:84 | exec |
○ | qmcplusplus::ParticleSet::make[...] | stl_vector.h:989 | exec |
○ | main._omp_fn.1 | refwrap.h:346 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►15.21+ | qmcplusplus::DistanceTableAA<d[...] | DistanceTableAA.h:77 | exec |
○ | qmcplusplus::ParticleSet::setA[...] | ParticleSet.cpp:259 | exec |
○ | main._omp_fn.1 | stl_vector.h:1126 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►14.89+ | qmcplusplus::DistanceTableAA<d[...] | DistanceTableAA.h:84 | exec |
○ | qmcplusplus::ParticleSet::make[...] | stl_vector.h:989 | exec |
○ | main._omp_fn.1 | stl_vector.h:1123 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►4.95+ | qmcplusplus::DistanceTableAA<d[...] | DistanceTableAA.h:84 | exec |
○ | qmcplusplus::ParticleSet::make[...] | stl_vector.h:989 | exec |
○ | main._omp_fn.1 | miniqmc.cpp:484 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►2.29+ | qmcplusplus::DistanceTableBA<d[...] | DistanceTableBA.h:120 | exec |
○ | qmcplusplus::ParticleSet::make[...] | stl_vector.h:989 | exec |
○ | main._omp_fn.1 | stl_vector.h:1123 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►2.08+ | qmcplusplus::DistanceTableBA<d[...] | DistanceTableBA.h:120 | exec |
○ | qmcplusplus::ParticleSet::make[...] | stl_vector.h:989 | exec |
○ | main._omp_fn.1 | refwrap.h:346 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►1.50+ | qmcplusplus::DistanceTableAA<d[...] | DistanceTableAA.h:67 | exec |
○ | qmcplusplus::ParticleSet::upda[...] | ParticleSet.cpp:250 | exec |
○ | main._omp_fn.0 | miniqmc.cpp:390 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►1.48+ | qmcplusplus::DistanceTableAA<d[...] | DistanceTableAA.h:67 | exec |
○ | qmcplusplus::ParticleSet::upda[...] | ParticleSet.cpp:250 | exec |
○ | main._omp_fn.0 | miniqmc.cpp:390 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►1.32+ | qmcplusplus::DistanceTableBA<d[...] | DistanceTableBA.h:120 | exec |
○ | qmcplusplus::ParticleSet::make[...] | stl_vector.h:989 | exec |
○ | main._omp_fn.1 | stl_vector.h:1123 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
►1.20+ | qmcplusplus::DistanceTableBA<d[...] | DistanceTableBA.h:114 | exec |
○ | qmcplusplus::ParticleSet::setA[...] | ParticleSet.cpp:259 | exec |
○ | main._omp_fn.1 | stl_vector.h:1126 | exec |
○ | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | ParticleBConds.h:159-219 |
Module | exec |
nb instructions | 84 |
nb uops | 84 |
loop length | 326 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 3 |
micro-operation queue | 21.00 cycles |
front end | 21.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 15.00 | 15.00 | 12.00 | 7.00 | 7.00 | 14.00 |
cycles | 7.00 | 7.50 | 15.00 | 15.00 | 12.00 | 7.00 | 7.00 | 14.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 21.10 |
Stall cycles | 0.00 |
Front-end | 21.00 |
Dispatch | 15.00 |
Overall L1 | 21.00 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 7% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSI,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x3,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (,%R9,8),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x20(%RBP),%EDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x18(%RBP),%R13D | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x10(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (%R12,%RDI,1),%RBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RBX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%R11),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x8(%R11),%XMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x10(%R11),%XMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RCX,%RSI,1),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R11,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R13D,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 491add <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi39EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ELm64ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii.isra.0+0x33d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %R13D,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOVSXD %R13D,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVQ 0x3d4aa(%RIP),%XMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x3e722(%RIP),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x3e7ca(%RIP),%XMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RDX,%RCX,1),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x2,%R15D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x3,%R14D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSI,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x3,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (,%R9,8),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x20(%RBP),%EDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x18(%RBP),%R13D | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x10(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (%R12,%RDI,1),%RBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RBX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%R11),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x8(%R11),%XMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x10(%R11),%XMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RCX,%RSI,1),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R11,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R13D,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 495b5d <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi39EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ELm64ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii.isra.0+0x33d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %R13D,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOVSXD %R13D,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVQ 0x3942a(%RIP),%XMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x3a6a2(%RIP),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x3a74a(%RIP),%XMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RDX,%RCX,1),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x2,%R15D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x3,%R14D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | ParticleBConds.h:159-219 |
Module | exec |
nb instructions | 84 |
nb uops | 84 |
loop length | 326 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 6 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 3 |
micro-operation queue | 21.00 cycles |
front end | 21.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 15.00 | 15.00 | 12.00 | 7.00 | 7.00 | 14.00 |
cycles | 7.00 | 7.50 | 15.00 | 15.00 | 12.00 | 7.00 | 7.00 | 14.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 21.10 |
Stall cycles | 0.00 |
Front-end | 21.00 |
Dispatch | 15.00 |
Overall L1 | 21.00 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 7% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSI,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x3,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (,%R9,8),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x20(%RBP),%EDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x18(%RBP),%R13D | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x10(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (%R12,%RDI,1),%RBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RBX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%R11),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x8(%R11),%XMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x10(%R11),%XMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RCX,%RSI,1),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R11,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R13D,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 491add <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi39EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ELm64ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii.isra.0+0x33d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %R13D,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOVSXD %R13D,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVQ 0x3d4aa(%RIP),%XMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x3e722(%RIP),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x3e7ca(%RIP),%XMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RDX,%RCX,1),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x2,%R15D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x3,%R14D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSI,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x3,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (,%R9,8),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x20(%RBP),%EDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x18(%RBP),%R13D | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x10(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (%R12,%RDI,1),%RBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RBX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%R11),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x8(%R11),%XMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x10(%R11),%XMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RCX,%RSI,1),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R11,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R13D,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 495b5d <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi39EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ELm64ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii.isra.0+0x33d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %R13D,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOVSXD %R13D,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVQ 0x3942a(%RIP),%XMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x3a6a2(%RIP),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x3a74a(%RIP),%XMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RDX,%RCX,1),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x2,%R15D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x3,%R14D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼void qmcplusplus::DTD_BConds | 43.62 | 42.54 |
○Loop 1107 - ParticleBConds.h:188-217 - exec | 40.42 | 39.43 |
○Loop 1106 - ParticleBConds.h:188-217 - exec | 3.19 | 3.11 |