Function: void qmcplusplus::DTD_BConds<double, 3u, 40>::computeDistances<qmcplusplus::TinyVector<dou ... | Module: exec | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 0.59% |
---|
Function: void qmcplusplus::DTD_BConds<double, 3u, 40>::computeDistances<qmcplusplus::TinyVector<dou ... | Module: exec | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 0.59% |
---|
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds3DSoa.h: 221 - 257 |
-------------------------------------------------------------------------------- |
221: { |
[...] |
234: #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT) |
235: for (int iat = first; iat < last; ++iat) |
236: { |
237: T displ_0 = px[iat] - x0; |
238: T displ_1 = py[iat] - y0; |
239: T displ_2 = pz[iat] - z0; |
240: |
241: T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20; |
242: T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21; |
243: T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22; |
244: |
245: //put them in the box |
246: ar_0 -= round(ar_0); |
247: ar_1 -= round(ar_1); |
248: ar_2 -= round(ar_2); |
249: |
250: //unit2cart |
251: dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
252: dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
253: dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
254: |
255: temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]); |
256: } |
257: } |
0x289690 PUSH %RBP |
0x289691 MOV %RSP,%RBP |
0x289694 PUSH %R15 |
0x289696 PUSH %R14 |
0x289698 PUSH %R13 |
0x28969a PUSH %R12 |
0x28969c PUSH %RBX |
0x28969d MOV 0x10(%RBP),%EAX |
0x2896a0 MOV %EAX,%R15D |
0x2896a3 SUB %R9D,%R15D |
0x2896a6 JLE 289b5a |
0x2896ac VMOVSD (%RSI),%XMM0 |
0x2896b0 VMOVSD 0x8(%RSI),%XMM1 |
0x2896b5 VMOVSD 0x10(%RSI),%XMM2 |
0x2896ba MOV 0x8(%RDX),%RSI |
0x2896be MOV 0x18(%RDX),%RDX |
0x2896c2 MOV 0x8(%R8),%R11 |
0x2896c6 MOV 0x18(%R8),%R8 |
0x2896ca XOR %R14D,%R14D |
0x2896cd LEA (%RDX,%RSI,8),%R10 |
0x2896d1 SAL $0x4,%RSI |
0x2896d5 LEA (%R8,%R11,8),%RBX |
0x2896d9 SAL $0x4,%R11 |
0x2896dd ADD %RDX,%RSI |
0x2896e0 ADD %R8,%R11 |
0x2896e3 CMP $0x4,%R15D |
0x2896e7 JB 289a1e |
0x2896ed LEA -0x1(%RAX),%R12D |
0x2896f1 CMP %R9D,%R12D |
0x2896f4 JL 289a1e |
0x2896fa XOR %R14D,%R14D |
0x2896fd CMP $0x8,%R15D |
0x289701 JB 2898aa |
0x289707 VPBROADCASTQ -0x85409(%RIP),%ZMM6 |
0x289711 VPBROADCASTQ -0x8556b(%RIP),%ZMM7 |
0x28971b VBROADCASTSD %XMM0,%ZMM3 |
0x289721 VBROADCASTSD %XMM1,%ZMM4 |
0x289727 VBROADCASTSD %XMM2,%ZMM5 |
0x28972d MOV %R15D,%R14D |
0x289730 AND $-0x8,%R14D |
0x289734 MOV %R9D,%R13D |
0x289737 MOV %R14D,%R12D |
0x28973a NOPW (%RAX,%RAX,1) |
(1310) 0x289740 MOVSXD %R13D,%R13 |
(1310) 0x289743 VMOVDQA64 %ZMM7,%ZMM13 |
(1310) 0x289749 VMOVUPD (%RDX,%R13,8),%ZMM8 |
(1310) 0x289750 VMOVUPD (%R10,%R13,8),%ZMM9 |
(1310) 0x289757 VMOVUPD (%RSI,%R13,8),%ZMM10 |
(1310) 0x28975e VSUBPD %ZMM3,%ZMM8,%ZMM8 |
(1310) 0x289764 VSUBPD %ZMM4,%ZMM9,%ZMM9 |
(1310) 0x28976a VMULPD 0x48(%RDI){1to8},%ZMM8,%ZMM11 |
(1310) 0x289771 VMULPD 0x60(%RDI){1to8},%ZMM8,%ZMM12 |
(1310) 0x289778 VSUBPD %ZMM5,%ZMM10,%ZMM10 |
(1310) 0x28977e VMULPD 0x78(%RDI){1to8},%ZMM8,%ZMM8 |
(1310) 0x289785 VFMADD231PD 0x50(%RDI){1to8},%ZMM9,%ZMM11 |
(1310) 0x28978c VFMADD231PD 0x68(%RDI){1to8},%ZMM9,%ZMM12 |
(1310) 0x289793 VFMADD231PD 0x80(%RDI){1to8},%ZMM9,%ZMM8 |
(1310) 0x28979a VMOVDQA64 %ZMM7,%ZMM9 |
(1310) 0x2897a0 VFMADD231PD 0x58(%RDI){1to8},%ZMM10,%ZMM11 |
(1310) 0x2897a7 VFMADD231PD 0x70(%RDI){1to8},%ZMM10,%ZMM12 |
(1310) 0x2897ae VFMADD231PD 0x88(%RDI){1to8},%ZMM10,%ZMM8 |
(1310) 0x2897b5 VMOVDQA64 %ZMM7,%ZMM10 |
(1310) 0x2897bb VPTERNLOGQ $-0x8,%ZMM6,%ZMM11,%ZMM9 |
(1310) 0x2897c2 VPTERNLOGQ $-0x8,%ZMM6,%ZMM12,%ZMM10 |
(1310) 0x2897c9 VPTERNLOGQ $-0x8,%ZMM6,%ZMM8,%ZMM13 |
(1310) 0x2897d0 VADDPD %ZMM9,%ZMM11,%ZMM9 |
(1310) 0x2897d6 VADDPD %ZMM10,%ZMM12,%ZMM10 |
(1310) 0x2897dc VRNDSCALEPD $0xb,%ZMM9,%ZMM9 |
(1310) 0x2897e3 VRNDSCALEPD $0xb,%ZMM10,%ZMM10 |
(1310) 0x2897ea VSUBPD %ZMM9,%ZMM11,%ZMM9 |
(1310) 0x2897f0 VADDPD %ZMM13,%ZMM8,%ZMM11 |
(1310) 0x2897f6 VSUBPD %ZMM10,%ZMM12,%ZMM10 |
(1310) 0x2897fc VMULPD (%RDI){1to8},%ZMM9,%ZMM12 |
(1310) 0x289802 VRNDSCALEPD $0xb,%ZMM11,%ZMM11 |
(1310) 0x289809 VFMADD231PD 0x8(%RDI){1to8},%ZMM10,%ZMM12 |
(1310) 0x289810 VSUBPD %ZMM11,%ZMM8,%ZMM8 |
(1310) 0x289816 VFMADD231PD 0x10(%RDI){1to8},%ZMM8,%ZMM12 |
(1310) 0x28981d VMOVUPD %ZMM12,(%R8,%R13,8) |
(1310) 0x289824 VMULPD 0x18(%RDI){1to8},%ZMM9,%ZMM11 |
(1310) 0x28982b VFMADD231PD 0x20(%RDI){1to8},%ZMM10,%ZMM11 |
(1310) 0x289832 VFMADD231PD 0x28(%RDI){1to8},%ZMM8,%ZMM11 |
(1310) 0x289839 VMOVUPD %ZMM11,(%RBX,%R13,8) |
(1310) 0x289840 VMULPD 0x30(%RDI){1to8},%ZMM9,%ZMM9 |
(1310) 0x289847 VFMADD231PD 0x38(%RDI){1to8},%ZMM10,%ZMM9 |
(1310) 0x28984e VFMADD231PD 0x40(%RDI){1to8},%ZMM8,%ZMM9 |
(1310) 0x289855 VMOVUPD %ZMM9,(%R11,%R13,8) |
(1310) 0x28985c VMOVUPD (%R8,%R13,8),%ZMM8 |
(1310) 0x289863 VMOVUPD (%RBX,%R13,8),%ZMM10 |
(1310) 0x28986a VMULPD %ZMM8,%ZMM8,%ZMM8 |
(1310) 0x289870 VFMADD231PD %ZMM9,%ZMM9,%ZMM8 |
(1310) 0x289876 VFMADD231PD %ZMM10,%ZMM10,%ZMM8 |
(1310) 0x28987c VSQRTPD %ZMM8,%ZMM8 |
(1310) 0x289882 VMOVUPD %ZMM8,(%RCX,%R13,8) |
(1310) 0x289889 ADD $0x8,%R13D |
(1310) 0x28988d ADD $-0x8,%R12D |
(1310) 0x289891 JNE 289740 |
0x289897 CMP %R14D,%R15D |
0x28989a JE 289b5a |
0x2898a0 TEST $0x4,%R15B |
0x2898a4 JE 289a1e |
0x2898aa VPBROADCASTQ -0x855ab(%RIP),%YMM6 |
0x2898b3 VPBROADCASTQ -0x8570c(%RIP),%YMM7 |
0x2898bc MOV %R14D,%R12D |
0x2898bf MOV %R15D,%R14D |
0x2898c2 AND $-0x4,%R14D |
0x2898c6 VBROADCASTSD %XMM0,%YMM3 |
0x2898cb VBROADCASTSD %XMM1,%YMM4 |
0x2898d0 VBROADCASTSD %XMM2,%YMM5 |
0x2898d5 LEA (%R12,%R9,1),%R13D |
0x2898d9 SUB %R14D,%R12D |
0x2898dc NOPL (%RAX) |
(1309) 0x2898e0 MOVSXD %R13D,%R13 |
(1309) 0x2898e3 VMOVUPD (%RDX,%R13,8),%YMM8 |
(1309) 0x2898e9 VMOVUPD (%R10,%R13,8),%YMM9 |
(1309) 0x2898ef VMOVUPD (%RSI,%R13,8),%YMM10 |
(1309) 0x2898f5 VSUBPD %YMM3,%YMM8,%YMM8 |
(1309) 0x2898f9 VSUBPD %YMM4,%YMM9,%YMM9 |
(1309) 0x2898fd VMULPD 0x48(%RDI){1to4},%YMM8,%YMM11 |
(1309) 0x289904 VSUBPD %YMM5,%YMM10,%YMM10 |
(1309) 0x289908 VMULPD 0x60(%RDI){1to4},%YMM8,%YMM12 |
(1309) 0x28990f VMULPD 0x78(%RDI){1to4},%YMM8,%YMM8 |
(1309) 0x289916 VFMADD231PD 0x50(%RDI){1to4},%YMM9,%YMM11 |
(1309) 0x28991d VFMADD231PD 0x68(%RDI){1to4},%YMM9,%YMM12 |
(1309) 0x289924 VFMADD231PD 0x80(%RDI){1to4},%YMM9,%YMM8 |
(1309) 0x28992b VMOVDQA %YMM7,%YMM9 |
(1309) 0x28992f VFMADD231PD 0x58(%RDI){1to4},%YMM10,%YMM11 |
(1309) 0x289936 VFMADD231PD 0x70(%RDI){1to4},%YMM10,%YMM12 |
(1309) 0x28993d VFMADD231PD 0x88(%RDI){1to4},%YMM10,%YMM8 |
(1309) 0x289944 VMOVDQA %YMM7,%YMM10 |
(1309) 0x289948 VPTERNLOGQ $-0x8,%YMM6,%YMM11,%YMM9 |
(1309) 0x28994f VPTERNLOGQ $-0x8,%YMM6,%YMM12,%YMM10 |
(1309) 0x289956 VADDPD %YMM9,%YMM11,%YMM9 |
(1309) 0x28995b VADDPD %YMM10,%YMM12,%YMM10 |
(1309) 0x289960 VROUNDPD $0xb,%YMM9,%YMM9 |
(1309) 0x289966 VROUNDPD $0xb,%YMM10,%YMM10 |
(1309) 0x28996c VSUBPD %YMM9,%YMM11,%YMM9 |
(1309) 0x289971 VMOVDQA %YMM7,%YMM11 |
(1309) 0x289975 VPTERNLOGQ $-0x8,%YMM6,%YMM8,%YMM11 |
(1309) 0x28997c VSUBPD %YMM10,%YMM12,%YMM10 |
(1309) 0x289981 VADDPD %YMM11,%YMM8,%YMM11 |
(1309) 0x289986 VROUNDPD $0xb,%YMM11,%YMM11 |
(1309) 0x28998c VSUBPD %YMM11,%YMM8,%YMM8 |
(1309) 0x289991 VMULPD (%RDI){1to4},%YMM9,%YMM11 |
(1309) 0x289997 VFMADD231PD 0x8(%RDI){1to4},%YMM10,%YMM11 |
(1309) 0x28999e VFMADD231PD 0x10(%RDI){1to4},%YMM8,%YMM11 |
(1309) 0x2899a5 VMOVUPD %YMM11,(%R8,%R13,8) |
(1309) 0x2899ab VMULPD 0x18(%RDI){1to4},%YMM9,%YMM11 |
(1309) 0x2899b2 VFMADD231PD 0x20(%RDI){1to4},%YMM10,%YMM11 |
(1309) 0x2899b9 VFMADD231PD 0x28(%RDI){1to4},%YMM8,%YMM11 |
(1309) 0x2899c0 VMOVUPD %YMM11,(%RBX,%R13,8) |
(1309) 0x2899c6 VMULPD 0x30(%RDI){1to4},%YMM9,%YMM9 |
(1309) 0x2899cd VFMADD231PD 0x38(%RDI){1to4},%YMM10,%YMM9 |
(1309) 0x2899d4 VFMADD231PD 0x40(%RDI){1to4},%YMM8,%YMM9 |
(1309) 0x2899db VMOVUPD %YMM9,(%R11,%R13,8) |
(1309) 0x2899e1 VMOVUPD (%R8,%R13,8),%YMM8 |
(1309) 0x2899e7 VMOVUPD (%RBX,%R13,8),%YMM10 |
(1309) 0x2899ed VMULPD %YMM8,%YMM8,%YMM8 |
(1309) 0x2899f2 VFMADD231PD %YMM9,%YMM9,%YMM8 |
(1309) 0x2899f7 VFMADD231PD %YMM10,%YMM10,%YMM8 |
(1309) 0x2899fc VSQRTPD %YMM8,%YMM8 |
(1309) 0x289a01 VMOVUPD %YMM8,(%RCX,%R13,8) |
(1309) 0x289a07 ADD $0x4,%R13D |
(1309) 0x289a0b ADD $0x4,%R12D |
(1309) 0x289a0f JNE 2898e0 |
0x289a15 CMP %R14D,%R15D |
0x289a18 JE 289b5a |
0x289a1e VPBROADCASTQ -0x8571f(%RIP),%XMM3 |
0x289a27 VPBROADCASTQ -0x85880(%RIP),%XMM4 |
0x289a30 ADD %R9D,%R14D |
0x289a33 NOPW %CS:(%RAX,%RAX,1) |
(1308) 0x289a40 MOVSXD %R14D,%R14 |
(1308) 0x289a43 VMOVSD (%RDX,%R14,8),%XMM5 |
(1308) 0x289a49 VMOVSD (%R10,%R14,8),%XMM6 |
(1308) 0x289a4f VMOVSD (%RSI,%R14,8),%XMM7 |
(1308) 0x289a55 VSUBSD %XMM0,%XMM5,%XMM5 |
(1308) 0x289a59 VSUBSD %XMM1,%XMM6,%XMM6 |
(1308) 0x289a5d VMULSD 0x48(%RDI),%XMM5,%XMM8 |
(1308) 0x289a62 VSUBSD %XMM2,%XMM7,%XMM7 |
(1308) 0x289a66 VMULSD 0x60(%RDI),%XMM5,%XMM9 |
(1308) 0x289a6b VMULSD 0x78(%RDI),%XMM5,%XMM5 |
(1308) 0x289a70 VFMADD231SD 0x50(%RDI),%XMM6,%XMM8 |
(1308) 0x289a76 VFMADD231SD 0x68(%RDI),%XMM6,%XMM9 |
(1308) 0x289a7c VFMADD231SD 0x80(%RDI),%XMM6,%XMM5 |
(1308) 0x289a85 VMOVDQA %XMM4,%XMM6 |
(1308) 0x289a89 VFMADD231SD 0x58(%RDI),%XMM7,%XMM8 |
(1308) 0x289a8f VFMADD231SD 0x70(%RDI),%XMM7,%XMM9 |
(1308) 0x289a95 VFMADD231SD 0x88(%RDI),%XMM7,%XMM5 |
(1308) 0x289a9e VMOVDQA %XMM4,%XMM7 |
(1308) 0x289aa2 VPTERNLOGQ $-0x8,%XMM3,%XMM8,%XMM6 |
(1308) 0x289aa9 VPTERNLOGQ $-0x8,%XMM3,%XMM9,%XMM7 |
(1308) 0x289ab0 VADDSD %XMM6,%XMM8,%XMM6 |
(1308) 0x289ab4 VADDSD %XMM7,%XMM9,%XMM7 |
(1308) 0x289ab8 VROUNDSD $0xb,%XMM6,%XMM6,%XMM6 |
(1308) 0x289abe VROUNDSD $0xb,%XMM7,%XMM7,%XMM7 |
(1308) 0x289ac4 VSUBSD %XMM6,%XMM8,%XMM6 |
(1308) 0x289ac8 VMOVDQA %XMM4,%XMM8 |
(1308) 0x289acc VPTERNLOGQ $-0x8,%XMM3,%XMM5,%XMM8 |
(1308) 0x289ad3 VSUBSD %XMM7,%XMM9,%XMM7 |
(1308) 0x289ad7 VADDSD %XMM5,%XMM8,%XMM8 |
(1308) 0x289adb VROUNDSD $0xb,%XMM8,%XMM8,%XMM8 |
(1308) 0x289ae1 VSUBSD %XMM8,%XMM5,%XMM5 |
(1308) 0x289ae6 VMULSD (%RDI),%XMM6,%XMM8 |
(1308) 0x289aea VFMADD231SD 0x8(%RDI),%XMM7,%XMM8 |
(1308) 0x289af0 VFMADD231SD 0x10(%RDI),%XMM5,%XMM8 |
(1308) 0x289af6 VMOVSD %XMM8,(%R8,%R14,8) |
(1308) 0x289afc VMULSD 0x18(%RDI),%XMM6,%XMM8 |
(1308) 0x289b01 VFMADD231SD 0x20(%RDI),%XMM7,%XMM8 |
(1308) 0x289b07 VFMADD231SD 0x28(%RDI),%XMM5,%XMM8 |
(1308) 0x289b0d VMOVSD %XMM8,(%RBX,%R14,8) |
(1308) 0x289b13 VMULSD 0x30(%RDI),%XMM6,%XMM6 |
(1308) 0x289b18 VFMADD231SD 0x38(%RDI),%XMM7,%XMM6 |
(1308) 0x289b1e VFMADD231SD 0x40(%RDI),%XMM5,%XMM6 |
(1308) 0x289b24 VMOVSD %XMM6,(%R11,%R14,8) |
(1308) 0x289b2a VMOVSD (%R8,%R14,8),%XMM5 |
(1308) 0x289b30 VMOVSD (%RBX,%R14,8),%XMM7 |
(1308) 0x289b36 VMULSD %XMM5,%XMM5,%XMM5 |
(1308) 0x289b3a VFMADD231SD %XMM6,%XMM6,%XMM5 |
(1308) 0x289b3f VFMADD231SD %XMM7,%XMM7,%XMM5 |
(1308) 0x289b44 VSQRTSD %XMM5,%XMM5,%XMM5 |
(1308) 0x289b48 VMOVSD %XMM5,(%RCX,%R14,8) |
(1308) 0x289b4e INC %R14D |
(1308) 0x289b51 CMP %R14D,%EAX |
(1308) 0x289b54 JNE 289a40 |
0x289b5a POP %RBX |
0x289b5b POP %R12 |
0x289b5d POP %R13 |
0x289b5f POP %R14 |
0x289b61 POP %R15 |
0x289b63 POP %RBP |
0x289b64 VZEROUPPER |
0x289b67 RET |
0x289b68 INT $0x3 |
0x289b69 INT $0x3 |
0x289b6a INT $0x3 |
0x289b6b INT $0x3 |
0x289b6c INT $0x3 |
0x289b6d INT $0x3 |
0x289b6e INT $0x3 |
0x289b6f INT $0x3 |
Path / |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | exec |
nb instructions | 80 |
nb uops | 69 |
loop length | 314 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 5 |
used zmm registers | 5 |
nb stack references | 1 |
micro-operation queue | 11.50 cycles |
front end | 11.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.75 | 5.75 | 5.75 | 5.75 | 4.00 | 4.67 | 4.67 | 4.67 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
cycles | 5.75 | 5.75 | 5.75 | 5.75 | 4.00 | 4.67 | 4.67 | 4.67 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 11.50 |
Dispatch | 5.75 |
Overall L1 | 11.50 |
all | 6% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 4% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 10% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EAX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R9D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 289b5a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4ca> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD (%RSI),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x8(%RSI),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x10(%RSI),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%RDX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RDX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%RDX,%RSI,8),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x4,%RSI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R8,%R11,8),%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x4,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R8,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP $0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 289a1e <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x38e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
LEA -0x1(%RAX),%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R9D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 289a1e <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x38e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP $0x8,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 2898aa <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x21a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x85409(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VPBROADCASTQ -0x8556b(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD %XMM0,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM1,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
MOV %R15D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x8,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
CMP %R14D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 289b5a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4ca> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST $0x4,%R15B | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 289a1e <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x38e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x855ab(%RIP),%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x8570c(%RIP),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x4,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VBROADCASTSD %XMM0,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM1,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM2,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R12,%R9,1),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R14D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
CMP %R14D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 289b5a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4ca> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x8571f(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x85880(%RIP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R9D,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | exec |
nb instructions | 80 |
nb uops | 69 |
loop length | 314 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 5 |
used zmm registers | 5 |
nb stack references | 1 |
micro-operation queue | 11.50 cycles |
front end | 11.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.75 | 5.75 | 5.75 | 5.75 | 4.00 | 4.67 | 4.67 | 4.67 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
cycles | 5.75 | 5.75 | 5.75 | 5.75 | 4.00 | 4.67 | 4.67 | 4.67 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 11.50 |
Dispatch | 5.75 |
Overall L1 | 11.50 |
all | 6% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 4% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 10% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EAX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R9D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 289b5a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4ca> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD (%RSI),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x8(%RSI),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x10(%RSI),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%RDX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RDX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%RDX,%RSI,8),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x4,%RSI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R8,%R11,8),%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x4,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R8,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP $0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 289a1e <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x38e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
LEA -0x1(%RAX),%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R9D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 289a1e <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x38e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP $0x8,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 2898aa <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x21a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x85409(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VPBROADCASTQ -0x8556b(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD %XMM0,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM1,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
MOV %R15D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x8,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
CMP %R14D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 289b5a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4ca> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST $0x4,%R15B | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 289a1e <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x38e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x855ab(%RIP),%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x8570c(%RIP),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x4,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VBROADCASTSD %XMM0,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM1,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM2,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R12,%R9,1),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R14D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
CMP %R14D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 289b5a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4ca> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x8571f(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x85880(%RIP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R9D,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼void qmcplusplus::DTD_BConds | 0.59 | 1.11 |
○Loop 1310 - ParticleBConds3DSoa.h:234-255 - exec | 0.59 | 1.08 |
○Loop 1309 - ParticleBConds3DSoa.h:234-255 - exec | 0 | 0 |
○Loop 1308 - ParticleBConds3DSoa.h:234-255 - exec | 0 | 0.01 |