Function: void qmcplusplus::DTD_BConds<double, 3u, 40>::computeDistances<qmcplusplus::TinyVector<dou ... | Module: libqmcparticle_omptarget.so | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 0.58% |
---|
Function: void qmcplusplus::DTD_BConds<double, 3u, 40>::computeDistances<qmcplusplus::TinyVector<dou ... | Module: libqmcparticle_omptarget.so | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 0.58% |
---|
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds3DSoa.h: 221 - 257 |
-------------------------------------------------------------------------------- |
221: { |
[...] |
234: #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT) |
235: for (int iat = first; iat < last; ++iat) |
236: { |
237: T displ_0 = px[iat] - x0; |
238: T displ_1 = py[iat] - y0; |
239: T displ_2 = pz[iat] - z0; |
240: |
241: T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20; |
242: T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21; |
243: T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22; |
244: |
245: //put them in the box |
246: ar_0 -= round(ar_0); |
247: ar_1 -= round(ar_1); |
248: ar_2 -= round(ar_2); |
249: |
250: //unit2cart |
251: dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
252: dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
253: dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
254: |
255: temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]); |
256: } |
257: } |
0x2c740 PUSH %RBP |
0x2c741 MOV %RSP,%RBP |
0x2c744 PUSH %R15 |
0x2c746 PUSH %R14 |
0x2c748 PUSH %R13 |
0x2c74a PUSH %R12 |
0x2c74c PUSH %RBX |
0x2c74d MOV 0x18(%RBP),%EAX |
0x2c750 TEST %EAX,%EAX |
0x2c752 JLE 2cc3a |
0x2c758 VMOVSD (%RSI),%XMM0 |
0x2c75c VMOVSD 0x8(%RSI),%XMM1 |
0x2c761 VMOVSD 0x10(%RSI),%XMM2 |
0x2c766 MOV 0x10(%RBP),%RBX |
0x2c76a MOV %RDX,%R12 |
0x2c76d MOV %R9,%R13 |
0x2c770 MOV %RCX,%R14 |
0x2c773 SAL $0x4,%R12 |
0x2c777 SAL $0x4,%R13 |
0x2c77b LEA (%RCX,%RDX,8),%RCX |
0x2c77f XOR %R15D,%R15D |
0x2c782 LEA (%R14,%R12,1),%RSI |
0x2c786 LEA (%RBX,%R9,8),%R11 |
0x2c78a LEA (%RBX,%R13,1),%R10 |
0x2c78e CMP $0x4,%EAX |
0x2c791 JB 2cb00 |
0x2c797 XOR %R15D,%R15D |
0x2c79a CMP $0x8,%EAX |
0x2c79d JB 2c990 |
0x2c7a3 VPBROADCASTQ -0x24d3d(%RIP),%ZMM6 |
0x2c7ad VPBROADCASTQ -0x24da7(%RIP),%ZMM7 |
0x2c7b7 MOV %RCX,-0x40(%RBP) |
0x2c7bb MOV %EAX,%ECX |
0x2c7bd SHR $0x3,%RCX |
0x2c7c1 VBROADCASTSD %XMM0,%ZMM3 |
0x2c7c7 VBROADCASTSD %XMM1,%ZMM4 |
0x2c7cd VBROADCASTSD %XMM2,%ZMM5 |
0x2c7d3 MOV %EAX,%R15D |
0x2c7d6 AND $-0x8,%R15D |
0x2c7da MOV %R10,-0x30(%RBP) |
0x2c7de MOV %RDX,-0x50(%RBP) |
0x2c7e2 MOV %RSI,-0x38(%RBP) |
0x2c7e6 XOR %EDX,%EDX |
0x2c7e8 MOV %RBX,%R10 |
0x2c7eb SAL $0x6,%RCX |
0x2c7ef MOV %RCX,-0x48(%RBP) |
0x2c7f3 MOV %R14,%RCX |
0x2c7f6 NOPW %CS:(%RAX,%RAX,1) |
(219) 0x2c800 MOV %R13,%RSI |
(219) 0x2c803 MOV %R12,%R13 |
(219) 0x2c806 MOV -0x50(%RBP),%R12 |
(219) 0x2c80a VMOVAPD (%RCX),%ZMM8 |
(219) 0x2c810 VMOVDDUP 0x78(%RDI),%XMM13 |
(219) 0x2c815 VMOVAPD (%RCX,%R12,8),%ZMM9 |
(219) 0x2c81c MOV %R13,%R12 |
(219) 0x2c81f VMOVAPD (%RCX,%R12,1),%ZMM10 |
(219) 0x2c826 ADD $0x40,%RCX |
(219) 0x2c82a MOV %RSI,%R13 |
(219) 0x2c82d VSUBPD %ZMM3,%ZMM8,%ZMM8 |
(219) 0x2c833 VMULPD 0x48(%RDI){1to8},%ZMM8,%ZMM11 |
(219) 0x2c83a VMULPD 0x60(%RDI){1to8},%ZMM8,%ZMM12 |
(219) 0x2c841 VBROADCASTSD %XMM13,%ZMM13 |
(219) 0x2c847 VMULPD %ZMM8,%ZMM13,%ZMM8 |
(219) 0x2c84d VMOVDQA64 %ZMM7,%ZMM13 |
(219) 0x2c853 VSUBPD %ZMM4,%ZMM9,%ZMM9 |
(219) 0x2c859 VSUBPD %ZMM5,%ZMM10,%ZMM10 |
(219) 0x2c85f VFMADD231PD 0x50(%RDI){1to8},%ZMM9,%ZMM11 |
(219) 0x2c866 VFMADD231PD 0x68(%RDI){1to8},%ZMM9,%ZMM12 |
(219) 0x2c86d VFMADD231PD 0x80(%RDI){1to8},%ZMM9,%ZMM8 |
(219) 0x2c874 VMOVDQA64 %ZMM7,%ZMM9 |
(219) 0x2c87a VFMADD231PD 0x58(%RDI){1to8},%ZMM10,%ZMM11 |
(219) 0x2c881 VFMADD231PD 0x70(%RDI){1to8},%ZMM10,%ZMM12 |
(219) 0x2c888 VFMADD231PD 0x88(%RDI){1to8},%ZMM10,%ZMM8 |
(219) 0x2c88f VMOVDQA64 %ZMM7,%ZMM10 |
(219) 0x2c895 VPTERNLOGQ $-0x8,%ZMM6,%ZMM11,%ZMM9 |
(219) 0x2c89c VPTERNLOGQ $-0x8,%ZMM6,%ZMM12,%ZMM10 |
(219) 0x2c8a3 VPTERNLOGQ $-0x8,%ZMM6,%ZMM8,%ZMM13 |
(219) 0x2c8aa VADDPD %ZMM9,%ZMM11,%ZMM9 |
(219) 0x2c8b0 VADDPD %ZMM10,%ZMM12,%ZMM10 |
(219) 0x2c8b6 VRNDSCALEPD $0xb,%ZMM9,%ZMM9 |
(219) 0x2c8bd VRNDSCALEPD $0xb,%ZMM10,%ZMM10 |
(219) 0x2c8c4 VSUBPD %ZMM9,%ZMM11,%ZMM9 |
(219) 0x2c8ca VADDPD %ZMM13,%ZMM8,%ZMM11 |
(219) 0x2c8d0 VSUBPD %ZMM10,%ZMM12,%ZMM10 |
(219) 0x2c8d6 VMULPD (%RDI){1to8},%ZMM9,%ZMM12 |
(219) 0x2c8dc VRNDSCALEPD $0xb,%ZMM11,%ZMM11 |
(219) 0x2c8e3 VFMADD231PD 0x8(%RDI){1to8},%ZMM10,%ZMM12 |
(219) 0x2c8ea VSUBPD %ZMM11,%ZMM8,%ZMM8 |
(219) 0x2c8f0 VFMADD231PD 0x10(%RDI){1to8},%ZMM8,%ZMM12 |
(219) 0x2c8f7 VMOVAPD %ZMM12,(%R10) |
(219) 0x2c8fd VMULPD 0x18(%RDI){1to8},%ZMM9,%ZMM11 |
(219) 0x2c904 VFMADD231PD 0x20(%RDI){1to8},%ZMM10,%ZMM11 |
(219) 0x2c90b VFMADD231PD 0x28(%RDI){1to8},%ZMM8,%ZMM11 |
(219) 0x2c912 VMOVAPD %ZMM11,(%R10,%R9,8) |
(219) 0x2c919 VMULPD 0x30(%RDI){1to8},%ZMM9,%ZMM9 |
(219) 0x2c920 VFMADD231PD 0x38(%RDI){1to8},%ZMM10,%ZMM9 |
(219) 0x2c927 VFMADD231PD 0x40(%RDI){1to8},%ZMM8,%ZMM9 |
(219) 0x2c92e VMOVAPD %ZMM9,(%R10,%RSI,1) |
(219) 0x2c935 VMOVAPD (%R10),%ZMM8 |
(219) 0x2c93b VMOVAPD (%R10,%R9,8),%ZMM10 |
(219) 0x2c942 ADD $0x40,%R10 |
(219) 0x2c946 VMULPD %ZMM8,%ZMM8,%ZMM8 |
(219) 0x2c94c VFMADD231PD %ZMM9,%ZMM9,%ZMM8 |
(219) 0x2c952 VFMADD231PD %ZMM10,%ZMM10,%ZMM8 |
(219) 0x2c958 VSQRTPD %ZMM8,%ZMM8 |
(219) 0x2c95e VMOVAPD %ZMM8,(%R8,%RDX,1) |
(219) 0x2c965 ADD $0x40,%RDX |
(219) 0x2c969 CMP %RDX,-0x48(%RBP) |
(219) 0x2c96d JNE 2c800 |
0x2c973 MOV -0x40(%RBP),%RCX |
0x2c977 MOV -0x38(%RBP),%RSI |
0x2c97b MOV -0x30(%RBP),%R10 |
0x2c97f CMP %EAX,%R15D |
0x2c982 JE 2cc3a |
0x2c988 TEST $0x4,%AL |
0x2c98a JE 2cb00 |
0x2c990 VPBROADCASTQ -0x24f29(%RIP),%YMM6 |
0x2c999 VPBROADCASTQ -0x24f92(%RIP),%YMM7 |
0x2c9a2 VBROADCASTSD %XMM0,%YMM3 |
0x2c9a7 VBROADCASTSD %XMM1,%YMM4 |
0x2c9ac VBROADCASTSD %XMM2,%YMM5 |
0x2c9b1 MOV %R15D,%EDX |
0x2c9b4 MOV %EAX,%R15D |
0x2c9b7 AND $-0x4,%R15D |
0x2c9bb NOPL (%RAX,%RAX,1) |
(218) 0x2c9c0 MOVSXD %EDX,%RDX |
(218) 0x2c9c3 VMOVDDUP 0x78(%RDI),%XMM13 |
(218) 0x2c9c8 VMOVAPD (%R14,%RDX,8),%YMM8 |
(218) 0x2c9ce VMOVAPD (%RCX,%RDX,8),%YMM9 |
(218) 0x2c9d3 VMOVAPD (%RSI,%RDX,8),%YMM10 |
(218) 0x2c9d8 VBROADCASTSD %XMM13,%YMM13 |
(218) 0x2c9dd VSUBPD %YMM3,%YMM8,%YMM8 |
(218) 0x2c9e1 VSUBPD %YMM4,%YMM9,%YMM9 |
(218) 0x2c9e5 VMULPD 0x48(%RDI){1to4},%YMM8,%YMM11 |
(218) 0x2c9ec VSUBPD %YMM5,%YMM10,%YMM10 |
(218) 0x2c9f0 VMULPD 0x60(%RDI){1to4},%YMM8,%YMM12 |
(218) 0x2c9f7 VFMADD231PD 0x50(%RDI){1to4},%YMM9,%YMM11 |
(218) 0x2c9fe VFMADD231PD 0x68(%RDI){1to4},%YMM9,%YMM12 |
(218) 0x2ca05 VMULPD %YMM8,%YMM13,%YMM8 |
(218) 0x2ca0a VFMADD231PD 0x80(%RDI){1to4},%YMM9,%YMM8 |
(218) 0x2ca11 VMOVDQA %YMM7,%YMM9 |
(218) 0x2ca15 VFMADD231PD 0x58(%RDI){1to4},%YMM10,%YMM11 |
(218) 0x2ca1c VFMADD231PD 0x70(%RDI){1to4},%YMM10,%YMM12 |
(218) 0x2ca23 VFMADD231PD 0x88(%RDI){1to4},%YMM10,%YMM8 |
(218) 0x2ca2a VMOVDQA %YMM7,%YMM10 |
(218) 0x2ca2e VPTERNLOGQ $-0x8,%YMM6,%YMM11,%YMM9 |
(218) 0x2ca35 VPTERNLOGQ $-0x8,%YMM6,%YMM12,%YMM10 |
(218) 0x2ca3c VADDPD %YMM9,%YMM11,%YMM9 |
(218) 0x2ca41 VADDPD %YMM10,%YMM12,%YMM10 |
(218) 0x2ca46 VROUNDPD $0xb,%YMM9,%YMM9 |
(218) 0x2ca4c VROUNDPD $0xb,%YMM10,%YMM10 |
(218) 0x2ca52 VSUBPD %YMM9,%YMM11,%YMM9 |
(218) 0x2ca57 VMOVDQA %YMM7,%YMM11 |
(218) 0x2ca5b VPTERNLOGQ $-0x8,%YMM6,%YMM8,%YMM11 |
(218) 0x2ca62 VSUBPD %YMM10,%YMM12,%YMM10 |
(218) 0x2ca67 VADDPD %YMM11,%YMM8,%YMM11 |
(218) 0x2ca6c VROUNDPD $0xb,%YMM11,%YMM11 |
(218) 0x2ca72 VSUBPD %YMM11,%YMM8,%YMM8 |
(218) 0x2ca77 VMULPD (%RDI){1to4},%YMM9,%YMM11 |
(218) 0x2ca7d VFMADD231PD 0x8(%RDI){1to4},%YMM10,%YMM11 |
(218) 0x2ca84 VFMADD231PD 0x10(%RDI){1to4},%YMM8,%YMM11 |
(218) 0x2ca8b VMOVAPD %YMM11,(%RBX,%RDX,8) |
(218) 0x2ca90 VMULPD 0x18(%RDI){1to4},%YMM9,%YMM11 |
(218) 0x2ca97 VFMADD231PD 0x20(%RDI){1to4},%YMM10,%YMM11 |
(218) 0x2ca9e VFMADD231PD 0x28(%RDI){1to4},%YMM8,%YMM11 |
(218) 0x2caa5 VMOVAPD %YMM11,(%R11,%RDX,8) |
(218) 0x2caab VMULPD 0x30(%RDI){1to4},%YMM9,%YMM9 |
(218) 0x2cab2 VFMADD231PD 0x38(%RDI){1to4},%YMM10,%YMM9 |
(218) 0x2cab9 VFMADD231PD 0x40(%RDI){1to4},%YMM8,%YMM9 |
(218) 0x2cac0 VMOVAPD %YMM9,(%R10,%RDX,8) |
(218) 0x2cac6 VMOVAPD (%RBX,%RDX,8),%YMM8 |
(218) 0x2cacb VMOVAPD (%R11,%RDX,8),%YMM10 |
(218) 0x2cad1 VMULPD %YMM8,%YMM8,%YMM8 |
(218) 0x2cad6 VFMADD231PD %YMM9,%YMM9,%YMM8 |
(218) 0x2cadb VFMADD231PD %YMM10,%YMM10,%YMM8 |
(218) 0x2cae0 VSQRTPD %YMM8,%YMM8 |
(218) 0x2cae5 VMOVAPD %YMM8,(%R8,%RDX,8) |
(218) 0x2caeb ADD $0x4,%EDX |
(218) 0x2caee CMP %EDX,%R15D |
(218) 0x2caf1 JNE 2c9c0 |
0x2caf7 CMP %EAX,%R15D |
0x2cafa JE 2cc3a |
0x2cb00 VPBROADCASTQ -0x25099(%RIP),%XMM3 |
0x2cb09 VPBROADCASTQ -0x25102(%RIP),%XMM4 |
0x2cb12 NOPW %CS:(%RAX,%RAX,1) |
(217) 0x2cb20 MOVSXD %R15D,%R15 |
(217) 0x2cb23 VMOVSD (%R14,%R15,8),%XMM5 |
(217) 0x2cb29 VMOVSD (%RCX,%R15,8),%XMM6 |
(217) 0x2cb2f VMOVSD (%RSI,%R15,8),%XMM7 |
(217) 0x2cb35 VSUBSD %XMM0,%XMM5,%XMM5 |
(217) 0x2cb39 VSUBSD %XMM1,%XMM6,%XMM6 |
(217) 0x2cb3d VMULSD 0x48(%RDI),%XMM5,%XMM8 |
(217) 0x2cb42 VSUBSD %XMM2,%XMM7,%XMM7 |
(217) 0x2cb46 VMULSD 0x60(%RDI),%XMM5,%XMM9 |
(217) 0x2cb4b VMULSD 0x78(%RDI),%XMM5,%XMM5 |
(217) 0x2cb50 VFMADD231SD 0x50(%RDI),%XMM6,%XMM8 |
(217) 0x2cb56 VFMADD231SD 0x68(%RDI),%XMM6,%XMM9 |
(217) 0x2cb5c VFMADD231SD 0x80(%RDI),%XMM6,%XMM5 |
(217) 0x2cb65 VMOVDQA %XMM4,%XMM6 |
(217) 0x2cb69 VFMADD231SD 0x58(%RDI),%XMM7,%XMM8 |
(217) 0x2cb6f VFMADD231SD 0x70(%RDI),%XMM7,%XMM9 |
(217) 0x2cb75 VFMADD231SD 0x88(%RDI),%XMM7,%XMM5 |
(217) 0x2cb7e VMOVDQA %XMM4,%XMM7 |
(217) 0x2cb82 VPTERNLOGQ $-0x8,%XMM3,%XMM8,%XMM6 |
(217) 0x2cb89 VPTERNLOGQ $-0x8,%XMM3,%XMM9,%XMM7 |
(217) 0x2cb90 VADDSD %XMM6,%XMM8,%XMM6 |
(217) 0x2cb94 VADDSD %XMM7,%XMM9,%XMM7 |
(217) 0x2cb98 VROUNDSD $0xb,%XMM6,%XMM6,%XMM6 |
(217) 0x2cb9e VROUNDSD $0xb,%XMM7,%XMM7,%XMM7 |
(217) 0x2cba4 VSUBSD %XMM6,%XMM8,%XMM6 |
(217) 0x2cba8 VMOVDQA %XMM4,%XMM8 |
(217) 0x2cbac VPTERNLOGQ $-0x8,%XMM3,%XMM5,%XMM8 |
(217) 0x2cbb3 VSUBSD %XMM7,%XMM9,%XMM7 |
(217) 0x2cbb7 VADDSD %XMM5,%XMM8,%XMM8 |
(217) 0x2cbbb VROUNDSD $0xb,%XMM8,%XMM8,%XMM8 |
(217) 0x2cbc1 VSUBSD %XMM8,%XMM5,%XMM5 |
(217) 0x2cbc6 VMULSD (%RDI),%XMM6,%XMM8 |
(217) 0x2cbca VFMADD231SD 0x8(%RDI),%XMM7,%XMM8 |
(217) 0x2cbd0 VFMADD231SD 0x10(%RDI),%XMM5,%XMM8 |
(217) 0x2cbd6 VMOVSD %XMM8,(%RBX,%R15,8) |
(217) 0x2cbdc VMULSD 0x18(%RDI),%XMM6,%XMM8 |
(217) 0x2cbe1 VFMADD231SD 0x20(%RDI),%XMM7,%XMM8 |
(217) 0x2cbe7 VFMADD231SD 0x28(%RDI),%XMM5,%XMM8 |
(217) 0x2cbed VMOVSD %XMM8,(%R11,%R15,8) |
(217) 0x2cbf3 VMULSD 0x30(%RDI),%XMM6,%XMM6 |
(217) 0x2cbf8 VFMADD231SD 0x38(%RDI),%XMM7,%XMM6 |
(217) 0x2cbfe VFMADD231SD 0x40(%RDI),%XMM5,%XMM6 |
(217) 0x2cc04 VMOVSD %XMM6,(%R10,%R15,8) |
(217) 0x2cc0a VMOVSD (%RBX,%R15,8),%XMM5 |
(217) 0x2cc10 VMOVSD (%R11,%R15,8),%XMM7 |
(217) 0x2cc16 VMULSD %XMM5,%XMM5,%XMM5 |
(217) 0x2cc1a VFMADD231SD %XMM6,%XMM6,%XMM5 |
(217) 0x2cc1f VFMADD231SD %XMM7,%XMM7,%XMM5 |
(217) 0x2cc24 VSQRTSD %XMM5,%XMM5,%XMM5 |
(217) 0x2cc28 VMOVSD %XMM5,(%R8,%R15,8) |
(217) 0x2cc2e INC %R15D |
(217) 0x2cc31 CMP %R15D,%EAX |
(217) 0x2cc34 JNE 2cb20 |
0x2cc3a POP %RBX |
0x2cc3b POP %R12 |
0x2cc3d POP %R13 |
0x2cc3f POP %R14 |
0x2cc41 POP %R15 |
0x2cc43 POP %RBP |
0x2cc44 VZEROUPPER |
0x2cc47 RET |
0x2cc48 INT $0x3 |
0x2cc49 INT $0x3 |
0x2cc4a INT $0x3 |
0x2cc4b INT $0x3 |
0x2cc4c INT $0x3 |
0x2cc4d INT $0x3 |
0x2cc4e INT $0x3 |
0x2cc4f INT $0x3 |
Path / |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | libqmcparticle_omptarget.so |
nb instructions | 85 |
nb uops | 74 |
loop length | 332 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 5 |
used zmm registers | 5 |
nb stack references | 7 |
micro-operation queue | 12.33 cycles |
front end | 12.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.00 | 5.00 | 4.75 | 4.75 | 3.50 | 6.33 | 6.33 | 6.33 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
cycles | 5.00 | 5.00 | 4.75 | 4.75 | 3.50 | 6.33 | 6.33 | 6.33 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 12.33 |
Dispatch | 6.33 |
Overall L1 | 12.33 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 11% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 2cc3a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4fa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD (%RSI),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x8(%RSI),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x10(%RSI),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x4,%R13 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%RCX,%RDX,8),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%R14,%R12,1),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RBX,%R9,8),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RBX,%R13,1),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP $0x4,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 2cb00 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x3c0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP $0x8,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 2c990 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x250> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x24d3d(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VPBROADCASTQ -0x24da7(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
MOV %RCX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x3,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM0,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM1,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
MOV %EAX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x8,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R10,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RBX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x6,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R14,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x40(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x30(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %EAX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 2cc3a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4fa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST $0x4,%AL | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 2cb00 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x3c0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x24f29(%RIP),%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x24f92(%RIP),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM0,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM1,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM2,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
CMP %EAX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 2cc3a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4fa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x25099(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x25102(%RIP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | libqmcparticle_omptarget.so |
nb instructions | 85 |
nb uops | 74 |
loop length | 332 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 5 |
used zmm registers | 5 |
nb stack references | 7 |
micro-operation queue | 12.33 cycles |
front end | 12.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.00 | 5.00 | 4.75 | 4.75 | 3.50 | 6.33 | 6.33 | 6.33 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
cycles | 5.00 | 5.00 | 4.75 | 4.75 | 3.50 | 6.33 | 6.33 | 6.33 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 12.33 |
Dispatch | 6.33 |
Overall L1 | 12.33 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 11% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 2cc3a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4fa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD (%RSI),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x8(%RSI),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x10(%RSI),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x4,%R13 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%RCX,%RDX,8),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%R14,%R12,1),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RBX,%R9,8),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RBX,%R13,1),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP $0x4,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 2cb00 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x3c0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP $0x8,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 2c990 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x250> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x24d3d(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VPBROADCASTQ -0x24da7(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
MOV %RCX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x3,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM0,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM1,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
MOV %EAX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x8,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R10,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RBX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x6,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R14,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x40(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x30(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %EAX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 2cc3a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4fa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST $0x4,%AL | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 2cb00 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x3c0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x24f29(%RIP),%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x24f92(%RIP),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM0,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM1,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM2,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
CMP %EAX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 2cc3a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4fa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x25099(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x25102(%RIP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼void qmcplusplus::DTD_BConds | 0.58 | 1.08 |
○Loop 219 - ParticleBConds3DSoa.h:234-255 - libqmcparticle_omptarget.so | 0.58 | 1.06 |
○Loop 217 - ParticleBConds3DSoa.h:234-255 - libqmcparticle_omptarget.so | 0 | 0.01 |
○Loop 218 - ParticleBConds3DSoa.h:234-255 - libqmcparticle_omptarget.so | 0 | 0 |