Function: _ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18Ve ... | Module: libqmcparticle_omptarget.so | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 0.71% |
---|
Function: _ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18Ve ... | Module: libqmcparticle_omptarget.so | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 0.71% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds3DSoa.h: 221 - 257 |
-------------------------------------------------------------------------------- |
221: { |
[...] |
234: #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT) |
235: for (int iat = first; iat < last; ++iat) |
236: { |
237: T displ_0 = px[iat] - x0; |
238: T displ_1 = py[iat] - y0; |
239: T displ_2 = pz[iat] - z0; |
240: |
241: T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20; |
242: T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21; |
243: T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22; |
244: |
245: //put them in the box |
246: ar_0 -= round(ar_0); |
247: ar_1 -= round(ar_1); |
248: ar_2 -= round(ar_2); |
249: |
250: //unit2cart |
251: dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
252: dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
253: dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
254: |
255: temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]); |
256: } |
257: } |
0x2c840 PUSH %RBP |
0x2c841 MOV %RSP,%RBP |
0x2c844 PUSH %R15 |
0x2c846 PUSH %R14 |
0x2c848 PUSH %R13 |
0x2c84a PUSH %R12 |
0x2c84c PUSH %RBX |
0x2c84d MOV 0x18(%RBP),%EAX |
0x2c850 TEST %EAX,%EAX |
0x2c852 JLE 2cd3a |
0x2c858 VMOVSD (%RSI),%XMM0 |
0x2c85c VMOVSD 0x8(%RSI),%XMM1 |
0x2c861 VMOVSD 0x10(%RSI),%XMM2 |
0x2c866 MOV 0x10(%RBP),%RBX |
0x2c86a MOV %RDX,%R12 |
0x2c86d MOV %R9,%R13 |
0x2c870 MOV %RCX,%R14 |
0x2c873 SAL $0x4,%R12 |
0x2c877 SAL $0x4,%R13 |
0x2c87b LEA (%RCX,%RDX,8),%RCX |
0x2c87f XOR %R15D,%R15D |
0x2c882 LEA (%R14,%R12,1),%RSI |
0x2c886 LEA (%RBX,%R9,8),%R11 |
0x2c88a LEA (%RBX,%R13,1),%R10 |
0x2c88e CMP $0x4,%EAX |
0x2c891 JB 2cc00 |
0x2c897 XOR %R15D,%R15D |
0x2c89a CMP $0x8,%EAX |
0x2c89d JB 2ca90 |
0x2c8a3 VPBROADCASTQ -0x24d3d(%RIP),%ZMM6 |
0x2c8ad VPBROADCASTQ -0x24da7(%RIP),%ZMM7 |
0x2c8b7 MOV %RCX,-0x40(%RBP) |
0x2c8bb MOV %EAX,%ECX |
0x2c8bd SHR $0x3,%RCX |
0x2c8c1 VBROADCASTSD %XMM0,%ZMM3 |
0x2c8c7 VBROADCASTSD %XMM1,%ZMM4 |
0x2c8cd VBROADCASTSD %XMM2,%ZMM5 |
0x2c8d3 MOV %EAX,%R15D |
0x2c8d6 AND $-0x8,%R15D |
0x2c8da MOV %R10,-0x30(%RBP) |
0x2c8de MOV %RDX,-0x50(%RBP) |
0x2c8e2 MOV %RSI,-0x38(%RBP) |
0x2c8e6 XOR %EDX,%EDX |
0x2c8e8 MOV %RBX,%R10 |
0x2c8eb SAL $0x6,%RCX |
0x2c8ef MOV %RCX,-0x48(%RBP) |
0x2c8f3 MOV %R14,%RCX |
0x2c8f6 NOPW %CS:(%RAX,%RAX,1) |
(219) 0x2c900 MOV %R13,%RSI |
(219) 0x2c903 MOV %R12,%R13 |
(219) 0x2c906 MOV -0x50(%RBP),%R12 |
(219) 0x2c90a VMOVAPD (%RCX),%ZMM8 |
(219) 0x2c910 VMOVDDUP 0x78(%RDI),%XMM13 |
(219) 0x2c915 VMOVAPD (%RCX,%R12,8),%ZMM9 |
(219) 0x2c91c MOV %R13,%R12 |
(219) 0x2c91f VMOVAPD (%RCX,%R12,1),%ZMM10 |
(219) 0x2c926 ADD $0x40,%RCX |
(219) 0x2c92a MOV %RSI,%R13 |
(219) 0x2c92d VSUBPD %ZMM3,%ZMM8,%ZMM8 |
(219) 0x2c933 VMULPD 0x48(%RDI){1to8},%ZMM8,%ZMM11 |
(219) 0x2c93a VMULPD 0x60(%RDI){1to8},%ZMM8,%ZMM12 |
(219) 0x2c941 VBROADCASTSD %XMM13,%ZMM13 |
(219) 0x2c947 VMULPD %ZMM8,%ZMM13,%ZMM8 |
(219) 0x2c94d VMOVDQA64 %ZMM7,%ZMM13 |
(219) 0x2c953 VSUBPD %ZMM4,%ZMM9,%ZMM9 |
(219) 0x2c959 VSUBPD %ZMM5,%ZMM10,%ZMM10 |
(219) 0x2c95f VFMADD231PD 0x50(%RDI){1to8},%ZMM9,%ZMM11 |
(219) 0x2c966 VFMADD231PD 0x68(%RDI){1to8},%ZMM9,%ZMM12 |
(219) 0x2c96d VFMADD231PD 0x80(%RDI){1to8},%ZMM9,%ZMM8 |
(219) 0x2c974 VMOVDQA64 %ZMM7,%ZMM9 |
(219) 0x2c97a VFMADD231PD 0x58(%RDI){1to8},%ZMM10,%ZMM11 |
(219) 0x2c981 VFMADD231PD 0x70(%RDI){1to8},%ZMM10,%ZMM12 |
(219) 0x2c988 VFMADD231PD 0x88(%RDI){1to8},%ZMM10,%ZMM8 |
(219) 0x2c98f VMOVDQA64 %ZMM7,%ZMM10 |
(219) 0x2c995 VPTERNLOGQ $-0x8,%ZMM6,%ZMM11,%ZMM9 |
(219) 0x2c99c VPTERNLOGQ $-0x8,%ZMM6,%ZMM12,%ZMM10 |
(219) 0x2c9a3 VPTERNLOGQ $-0x8,%ZMM6,%ZMM8,%ZMM13 |
(219) 0x2c9aa VADDPD %ZMM9,%ZMM11,%ZMM9 |
(219) 0x2c9b0 VADDPD %ZMM10,%ZMM12,%ZMM10 |
(219) 0x2c9b6 VRNDSCALEPD $0xb,%ZMM9,%ZMM9 |
(219) 0x2c9bd VRNDSCALEPD $0xb,%ZMM10,%ZMM10 |
(219) 0x2c9c4 VSUBPD %ZMM9,%ZMM11,%ZMM9 |
(219) 0x2c9ca VADDPD %ZMM13,%ZMM8,%ZMM11 |
(219) 0x2c9d0 VSUBPD %ZMM10,%ZMM12,%ZMM10 |
(219) 0x2c9d6 VMULPD (%RDI){1to8},%ZMM9,%ZMM12 |
(219) 0x2c9dc VRNDSCALEPD $0xb,%ZMM11,%ZMM11 |
(219) 0x2c9e3 VFMADD231PD 0x8(%RDI){1to8},%ZMM10,%ZMM12 |
(219) 0x2c9ea VSUBPD %ZMM11,%ZMM8,%ZMM8 |
(219) 0x2c9f0 VFMADD231PD 0x10(%RDI){1to8},%ZMM8,%ZMM12 |
(219) 0x2c9f7 VMOVAPD %ZMM12,(%R10) |
(219) 0x2c9fd VMULPD 0x18(%RDI){1to8},%ZMM9,%ZMM11 |
(219) 0x2ca04 VFMADD231PD 0x20(%RDI){1to8},%ZMM10,%ZMM11 |
(219) 0x2ca0b VFMADD231PD 0x28(%RDI){1to8},%ZMM8,%ZMM11 |
(219) 0x2ca12 VMOVAPD %ZMM11,(%R10,%R9,8) |
(219) 0x2ca19 VMULPD 0x30(%RDI){1to8},%ZMM9,%ZMM9 |
(219) 0x2ca20 VFMADD231PD 0x38(%RDI){1to8},%ZMM10,%ZMM9 |
(219) 0x2ca27 VFMADD231PD 0x40(%RDI){1to8},%ZMM8,%ZMM9 |
(219) 0x2ca2e VMOVAPD %ZMM9,(%R10,%RSI,1) |
(219) 0x2ca35 VMOVAPD (%R10),%ZMM8 |
(219) 0x2ca3b VMOVAPD (%R10,%R9,8),%ZMM10 |
(219) 0x2ca42 ADD $0x40,%R10 |
(219) 0x2ca46 VMULPD %ZMM8,%ZMM8,%ZMM8 |
(219) 0x2ca4c VFMADD231PD %ZMM9,%ZMM9,%ZMM8 |
(219) 0x2ca52 VFMADD231PD %ZMM10,%ZMM10,%ZMM8 |
(219) 0x2ca58 VSQRTPD %ZMM8,%ZMM8 |
(219) 0x2ca5e VMOVAPD %ZMM8,(%R8,%RDX,1) |
(219) 0x2ca65 ADD $0x40,%RDX |
(219) 0x2ca69 CMP %RDX,-0x48(%RBP) |
(219) 0x2ca6d JNE 2c900 |
0x2ca73 MOV -0x40(%RBP),%RCX |
0x2ca77 MOV -0x38(%RBP),%RSI |
0x2ca7b MOV -0x30(%RBP),%R10 |
0x2ca7f CMP %EAX,%R15D |
0x2ca82 JE 2cd3a |
0x2ca88 TEST $0x4,%AL |
0x2ca8a JE 2cc00 |
0x2ca90 VPBROADCASTQ -0x24f29(%RIP),%YMM6 |
0x2ca99 VPBROADCASTQ -0x24f92(%RIP),%YMM7 |
0x2caa2 VBROADCASTSD %XMM0,%YMM3 |
0x2caa7 VBROADCASTSD %XMM1,%YMM4 |
0x2caac VBROADCASTSD %XMM2,%YMM5 |
0x2cab1 MOV %R15D,%EDX |
0x2cab4 MOV %EAX,%R15D |
0x2cab7 AND $-0x4,%R15D |
0x2cabb NOPL (%RAX,%RAX,1) |
(218) 0x2cac0 MOVSXD %EDX,%RDX |
(218) 0x2cac3 VMOVDDUP 0x78(%RDI),%XMM13 |
(218) 0x2cac8 VMOVAPD (%R14,%RDX,8),%YMM8 |
(218) 0x2cace VMOVAPD (%RCX,%RDX,8),%YMM9 |
(218) 0x2cad3 VMOVAPD (%RSI,%RDX,8),%YMM10 |
(218) 0x2cad8 VBROADCASTSD %XMM13,%YMM13 |
(218) 0x2cadd VSUBPD %YMM3,%YMM8,%YMM8 |
(218) 0x2cae1 VSUBPD %YMM4,%YMM9,%YMM9 |
(218) 0x2cae5 VMULPD 0x48(%RDI){1to4},%YMM8,%YMM11 |
(218) 0x2caec VSUBPD %YMM5,%YMM10,%YMM10 |
(218) 0x2caf0 VMULPD 0x60(%RDI){1to4},%YMM8,%YMM12 |
(218) 0x2caf7 VFMADD231PD 0x50(%RDI){1to4},%YMM9,%YMM11 |
(218) 0x2cafe VFMADD231PD 0x68(%RDI){1to4},%YMM9,%YMM12 |
(218) 0x2cb05 VMULPD %YMM8,%YMM13,%YMM8 |
(218) 0x2cb0a VFMADD231PD 0x80(%RDI){1to4},%YMM9,%YMM8 |
(218) 0x2cb11 VMOVDQA %YMM7,%YMM9 |
(218) 0x2cb15 VFMADD231PD 0x58(%RDI){1to4},%YMM10,%YMM11 |
(218) 0x2cb1c VFMADD231PD 0x70(%RDI){1to4},%YMM10,%YMM12 |
(218) 0x2cb23 VFMADD231PD 0x88(%RDI){1to4},%YMM10,%YMM8 |
(218) 0x2cb2a VMOVDQA %YMM7,%YMM10 |
(218) 0x2cb2e VPTERNLOGQ $-0x8,%YMM6,%YMM11,%YMM9 |
(218) 0x2cb35 VPTERNLOGQ $-0x8,%YMM6,%YMM12,%YMM10 |
(218) 0x2cb3c VADDPD %YMM9,%YMM11,%YMM9 |
(218) 0x2cb41 VADDPD %YMM10,%YMM12,%YMM10 |
(218) 0x2cb46 VROUNDPD $0xb,%YMM9,%YMM9 |
(218) 0x2cb4c VROUNDPD $0xb,%YMM10,%YMM10 |
(218) 0x2cb52 VSUBPD %YMM9,%YMM11,%YMM9 |
(218) 0x2cb57 VMOVDQA %YMM7,%YMM11 |
(218) 0x2cb5b VPTERNLOGQ $-0x8,%YMM6,%YMM8,%YMM11 |
(218) 0x2cb62 VSUBPD %YMM10,%YMM12,%YMM10 |
(218) 0x2cb67 VADDPD %YMM11,%YMM8,%YMM11 |
(218) 0x2cb6c VROUNDPD $0xb,%YMM11,%YMM11 |
(218) 0x2cb72 VSUBPD %YMM11,%YMM8,%YMM8 |
(218) 0x2cb77 VMULPD (%RDI){1to4},%YMM9,%YMM11 |
(218) 0x2cb7d VFMADD231PD 0x8(%RDI){1to4},%YMM10,%YMM11 |
(218) 0x2cb84 VFMADD231PD 0x10(%RDI){1to4},%YMM8,%YMM11 |
(218) 0x2cb8b VMOVAPD %YMM11,(%RBX,%RDX,8) |
(218) 0x2cb90 VMULPD 0x18(%RDI){1to4},%YMM9,%YMM11 |
(218) 0x2cb97 VFMADD231PD 0x20(%RDI){1to4},%YMM10,%YMM11 |
(218) 0x2cb9e VFMADD231PD 0x28(%RDI){1to4},%YMM8,%YMM11 |
(218) 0x2cba5 VMOVAPD %YMM11,(%R11,%RDX,8) |
(218) 0x2cbab VMULPD 0x30(%RDI){1to4},%YMM9,%YMM9 |
(218) 0x2cbb2 VFMADD231PD 0x38(%RDI){1to4},%YMM10,%YMM9 |
(218) 0x2cbb9 VFMADD231PD 0x40(%RDI){1to4},%YMM8,%YMM9 |
(218) 0x2cbc0 VMOVAPD %YMM9,(%R10,%RDX,8) |
(218) 0x2cbc6 VMOVAPD (%RBX,%RDX,8),%YMM8 |
(218) 0x2cbcb VMOVAPD (%R11,%RDX,8),%YMM10 |
(218) 0x2cbd1 VMULPD %YMM8,%YMM8,%YMM8 |
(218) 0x2cbd6 VFMADD231PD %YMM9,%YMM9,%YMM8 |
(218) 0x2cbdb VFMADD231PD %YMM10,%YMM10,%YMM8 |
(218) 0x2cbe0 VSQRTPD %YMM8,%YMM8 |
(218) 0x2cbe5 VMOVAPD %YMM8,(%R8,%RDX,8) |
(218) 0x2cbeb ADD $0x4,%EDX |
(218) 0x2cbee CMP %EDX,%R15D |
(218) 0x2cbf1 JNE 2cac0 |
0x2cbf7 CMP %EAX,%R15D |
0x2cbfa JE 2cd3a |
0x2cc00 VPBROADCASTQ -0x25099(%RIP),%XMM3 |
0x2cc09 VPBROADCASTQ -0x25102(%RIP),%XMM4 |
0x2cc12 NOPW %CS:(%RAX,%RAX,1) |
(217) 0x2cc20 MOVSXD %R15D,%R15 |
(217) 0x2cc23 VMOVSD (%R14,%R15,8),%XMM5 |
(217) 0x2cc29 VMOVSD (%RCX,%R15,8),%XMM6 |
(217) 0x2cc2f VMOVSD (%RSI,%R15,8),%XMM7 |
(217) 0x2cc35 VSUBSD %XMM0,%XMM5,%XMM5 |
(217) 0x2cc39 VSUBSD %XMM1,%XMM6,%XMM6 |
(217) 0x2cc3d VMULSD 0x48(%RDI),%XMM5,%XMM8 |
(217) 0x2cc42 VSUBSD %XMM2,%XMM7,%XMM7 |
(217) 0x2cc46 VMULSD 0x60(%RDI),%XMM5,%XMM9 |
(217) 0x2cc4b VMULSD 0x78(%RDI),%XMM5,%XMM5 |
(217) 0x2cc50 VFMADD231SD 0x50(%RDI),%XMM6,%XMM8 |
(217) 0x2cc56 VFMADD231SD 0x68(%RDI),%XMM6,%XMM9 |
(217) 0x2cc5c VFMADD231SD 0x80(%RDI),%XMM6,%XMM5 |
(217) 0x2cc65 VMOVDQA %XMM4,%XMM6 |
(217) 0x2cc69 VFMADD231SD 0x58(%RDI),%XMM7,%XMM8 |
(217) 0x2cc6f VFMADD231SD 0x70(%RDI),%XMM7,%XMM9 |
(217) 0x2cc75 VFMADD231SD 0x88(%RDI),%XMM7,%XMM5 |
(217) 0x2cc7e VMOVDQA %XMM4,%XMM7 |
(217) 0x2cc82 VPTERNLOGQ $-0x8,%XMM3,%XMM8,%XMM6 |
(217) 0x2cc89 VPTERNLOGQ $-0x8,%XMM3,%XMM9,%XMM7 |
(217) 0x2cc90 VADDSD %XMM6,%XMM8,%XMM6 |
(217) 0x2cc94 VADDSD %XMM7,%XMM9,%XMM7 |
(217) 0x2cc98 VROUNDSD $0xb,%XMM6,%XMM6,%XMM6 |
(217) 0x2cc9e VROUNDSD $0xb,%XMM7,%XMM7,%XMM7 |
(217) 0x2cca4 VSUBSD %XMM6,%XMM8,%XMM6 |
(217) 0x2cca8 VMOVDQA %XMM4,%XMM8 |
(217) 0x2ccac VPTERNLOGQ $-0x8,%XMM3,%XMM5,%XMM8 |
(217) 0x2ccb3 VSUBSD %XMM7,%XMM9,%XMM7 |
(217) 0x2ccb7 VADDSD %XMM5,%XMM8,%XMM8 |
(217) 0x2ccbb VROUNDSD $0xb,%XMM8,%XMM8,%XMM8 |
(217) 0x2ccc1 VSUBSD %XMM8,%XMM5,%XMM5 |
(217) 0x2ccc6 VMULSD (%RDI),%XMM6,%XMM8 |
(217) 0x2ccca VFMADD231SD 0x8(%RDI),%XMM7,%XMM8 |
(217) 0x2ccd0 VFMADD231SD 0x10(%RDI),%XMM5,%XMM8 |
(217) 0x2ccd6 VMOVSD %XMM8,(%RBX,%R15,8) |
(217) 0x2ccdc VMULSD 0x18(%RDI),%XMM6,%XMM8 |
(217) 0x2cce1 VFMADD231SD 0x20(%RDI),%XMM7,%XMM8 |
(217) 0x2cce7 VFMADD231SD 0x28(%RDI),%XMM5,%XMM8 |
(217) 0x2cced VMOVSD %XMM8,(%R11,%R15,8) |
(217) 0x2ccf3 VMULSD 0x30(%RDI),%XMM6,%XMM6 |
(217) 0x2ccf8 VFMADD231SD 0x38(%RDI),%XMM7,%XMM6 |
(217) 0x2ccfe VFMADD231SD 0x40(%RDI),%XMM5,%XMM6 |
(217) 0x2cd04 VMOVSD %XMM6,(%R10,%R15,8) |
(217) 0x2cd0a VMOVSD (%RBX,%R15,8),%XMM5 |
(217) 0x2cd10 VMOVSD (%R11,%R15,8),%XMM7 |
(217) 0x2cd16 VMULSD %XMM5,%XMM5,%XMM5 |
(217) 0x2cd1a VFMADD231SD %XMM6,%XMM6,%XMM5 |
(217) 0x2cd1f VFMADD231SD %XMM7,%XMM7,%XMM5 |
(217) 0x2cd24 VSQRTSD %XMM5,%XMM5,%XMM5 |
(217) 0x2cd28 VMOVSD %XMM5,(%R8,%R15,8) |
(217) 0x2cd2e INC %R15D |
(217) 0x2cd31 CMP %R15D,%EAX |
(217) 0x2cd34 JNE 2cc20 |
0x2cd3a POP %RBX |
0x2cd3b POP %R12 |
0x2cd3d POP %R13 |
0x2cd3f POP %R14 |
0x2cd41 POP %R15 |
0x2cd43 POP %RBP |
0x2cd44 VZEROUPPER |
0x2cd47 RET |
0x2cd48 INT $0x3 |
0x2cd49 INT $0x3 |
0x2cd4a INT $0x3 |
0x2cd4b INT $0x3 |
0x2cd4c INT $0x3 |
0x2cd4d INT $0x3 |
0x2cd4e INT $0x3 |
0x2cd4f INT $0x3 |
Path / |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | libqmcparticle_omptarget.so |
nb instructions | 85 |
nb uops | 74 |
loop length | 332 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 5 |
used zmm registers | 5 |
nb stack references | 7 |
micro-operation queue | 12.33 cycles |
front end | 12.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.00 | 5.00 | 4.75 | 4.75 | 3.50 | 6.33 | 6.33 | 6.33 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
cycles | 5.00 | 5.00 | 4.75 | 4.75 | 3.50 | 6.33 | 6.33 | 6.33 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 12.33 |
Dispatch | 6.33 |
Overall L1 | 12.33 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 11% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 2cd3a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4fa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD (%RSI),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x8(%RSI),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x10(%RSI),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x4,%R13 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%RCX,%RDX,8),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%R14,%R12,1),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RBX,%R9,8),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RBX,%R13,1),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP $0x4,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 2cc00 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x3c0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP $0x8,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 2ca90 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x250> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x24d3d(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VPBROADCASTQ -0x24da7(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
MOV %RCX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x3,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM0,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM1,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
MOV %EAX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x8,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R10,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RBX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x6,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R14,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x40(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x30(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %EAX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 2cd3a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4fa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST $0x4,%AL | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 2cc00 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x3c0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x24f29(%RIP),%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x24f92(%RIP),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM0,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM1,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM2,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
CMP %EAX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 2cd3a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4fa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x25099(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x25102(%RIP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | libqmcparticle_omptarget.so |
nb instructions | 85 |
nb uops | 74 |
loop length | 332 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 5 |
used zmm registers | 5 |
nb stack references | 7 |
micro-operation queue | 12.33 cycles |
front end | 12.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.00 | 5.00 | 4.75 | 4.75 | 3.50 | 6.33 | 6.33 | 6.33 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
cycles | 5.00 | 5.00 | 4.75 | 4.75 | 3.50 | 6.33 | 6.33 | 6.33 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 12.33 |
Dispatch | 6.33 |
Overall L1 | 12.33 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 11% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 2cd3a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4fa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD (%RSI),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x8(%RSI),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x10(%RSI),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x4,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x4,%R13 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%RCX,%RDX,8),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%R14,%R12,1),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RBX,%R9,8),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RBX,%R13,1),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP $0x4,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 2cc00 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x3c0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP $0x8,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 2ca90 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x250> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x24d3d(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VPBROADCASTQ -0x24da7(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
MOV %RCX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x3,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM0,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM1,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
MOV %EAX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x8,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R10,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RBX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x6,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R14,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x40(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x30(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %EAX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 2cd3a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4fa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST $0x4,%AL | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 2cc00 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x3c0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x24f29(%RIP),%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x24f92(%RIP),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM0,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM1,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM2,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
CMP %EAX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 2cd3a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4fa> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x25099(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x25102(%RIP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii– | 0.71 | 1.44 |
○Loop 219 - ParticleBConds3DSoa.h:234-255 - libqmcparticle_omptarget.so | 0.7 | 1.32 |
○Loop 217 - ParticleBConds3DSoa.h:234-255 - libqmcparticle_omptarget.so | 0 | 0 |
○Loop 218 - ParticleBConds3DSoa.h:234-255 - libqmcparticle_omptarget.so | 0 | 0 |