Function: _ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18Ve ... | Module: exec | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 0.67% |
---|
Function: _ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18Ve ... | Module: exec | Source: ParticleBConds3DSoa.h:221-257 [...] | Coverage: 0.67% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds3DSoa.h: 221 - 257 |
-------------------------------------------------------------------------------- |
221: { |
[...] |
234: #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT) |
235: for (int iat = first; iat < last; ++iat) |
236: { |
237: T displ_0 = px[iat] - x0; |
238: T displ_1 = py[iat] - y0; |
239: T displ_2 = pz[iat] - z0; |
240: |
241: T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20; |
242: T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21; |
243: T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22; |
244: |
245: //put them in the box |
246: ar_0 -= round(ar_0); |
247: ar_1 -= round(ar_1); |
248: ar_2 -= round(ar_2); |
249: |
250: //unit2cart |
251: dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
252: dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
253: dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
254: |
255: temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]); |
256: } |
257: } |
0x289950 PUSH %RBP |
0x289951 MOV %RSP,%RBP |
0x289954 PUSH %R15 |
0x289956 PUSH %R14 |
0x289958 PUSH %R13 |
0x28995a PUSH %R12 |
0x28995c PUSH %RBX |
0x28995d MOV 0x10(%RBP),%EAX |
0x289960 MOV %EAX,%R15D |
0x289963 SUB %R9D,%R15D |
0x289966 JLE 289e1a |
0x28996c VMOVSD (%RSI),%XMM0 |
0x289970 VMOVSD 0x8(%RSI),%XMM1 |
0x289975 VMOVSD 0x10(%RSI),%XMM2 |
0x28997a MOV 0x8(%RDX),%RSI |
0x28997e MOV 0x18(%RDX),%RDX |
0x289982 MOV 0x8(%R8),%R11 |
0x289986 MOV 0x18(%R8),%R8 |
0x28998a XOR %R14D,%R14D |
0x28998d LEA (%RDX,%RSI,8),%R10 |
0x289991 SAL $0x4,%RSI |
0x289995 LEA (%R8,%R11,8),%RBX |
0x289999 SAL $0x4,%R11 |
0x28999d ADD %RDX,%RSI |
0x2899a0 ADD %R8,%R11 |
0x2899a3 CMP $0x4,%R15D |
0x2899a7 JB 289cde |
0x2899ad LEA -0x1(%RAX),%R12D |
0x2899b1 CMP %R9D,%R12D |
0x2899b4 JL 289cde |
0x2899ba XOR %R14D,%R14D |
0x2899bd CMP $0x8,%R15D |
0x2899c1 JB 289b6a |
0x2899c7 VPBROADCASTQ -0x85741(%RIP),%ZMM6 |
0x2899d1 VPBROADCASTQ -0x858a3(%RIP),%ZMM7 |
0x2899db VBROADCASTSD %XMM0,%ZMM3 |
0x2899e1 VBROADCASTSD %XMM1,%ZMM4 |
0x2899e7 VBROADCASTSD %XMM2,%ZMM5 |
0x2899ed MOV %R15D,%R14D |
0x2899f0 AND $-0x8,%R14D |
0x2899f4 MOV %R9D,%R13D |
0x2899f7 MOV %R14D,%R12D |
0x2899fa NOPW (%RAX,%RAX,1) |
(1310) 0x289a00 MOVSXD %R13D,%R13 |
(1310) 0x289a03 VMOVDQA64 %ZMM7,%ZMM13 |
(1310) 0x289a09 VMOVUPD (%RDX,%R13,8),%ZMM8 |
(1310) 0x289a10 VMOVUPD (%R10,%R13,8),%ZMM9 |
(1310) 0x289a17 VMOVUPD (%RSI,%R13,8),%ZMM10 |
(1310) 0x289a1e VSUBPD %ZMM3,%ZMM8,%ZMM8 |
(1310) 0x289a24 VSUBPD %ZMM4,%ZMM9,%ZMM9 |
(1310) 0x289a2a VMULPD 0x48(%RDI){1to8},%ZMM8,%ZMM11 |
(1310) 0x289a31 VMULPD 0x60(%RDI){1to8},%ZMM8,%ZMM12 |
(1310) 0x289a38 VSUBPD %ZMM5,%ZMM10,%ZMM10 |
(1310) 0x289a3e VMULPD 0x78(%RDI){1to8},%ZMM8,%ZMM8 |
(1310) 0x289a45 VFMADD231PD 0x50(%RDI){1to8},%ZMM9,%ZMM11 |
(1310) 0x289a4c VFMADD231PD 0x68(%RDI){1to8},%ZMM9,%ZMM12 |
(1310) 0x289a53 VFMADD231PD 0x80(%RDI){1to8},%ZMM9,%ZMM8 |
(1310) 0x289a5a VMOVDQA64 %ZMM7,%ZMM9 |
(1310) 0x289a60 VFMADD231PD 0x58(%RDI){1to8},%ZMM10,%ZMM11 |
(1310) 0x289a67 VFMADD231PD 0x70(%RDI){1to8},%ZMM10,%ZMM12 |
(1310) 0x289a6e VFMADD231PD 0x88(%RDI){1to8},%ZMM10,%ZMM8 |
(1310) 0x289a75 VMOVDQA64 %ZMM7,%ZMM10 |
(1310) 0x289a7b VPTERNLOGQ $-0x8,%ZMM6,%ZMM11,%ZMM9 |
(1310) 0x289a82 VPTERNLOGQ $-0x8,%ZMM6,%ZMM12,%ZMM10 |
(1310) 0x289a89 VPTERNLOGQ $-0x8,%ZMM6,%ZMM8,%ZMM13 |
(1310) 0x289a90 VADDPD %ZMM9,%ZMM11,%ZMM9 |
(1310) 0x289a96 VADDPD %ZMM10,%ZMM12,%ZMM10 |
(1310) 0x289a9c VRNDSCALEPD $0xb,%ZMM9,%ZMM9 |
(1310) 0x289aa3 VRNDSCALEPD $0xb,%ZMM10,%ZMM10 |
(1310) 0x289aaa VSUBPD %ZMM9,%ZMM11,%ZMM9 |
(1310) 0x289ab0 VADDPD %ZMM13,%ZMM8,%ZMM11 |
(1310) 0x289ab6 VSUBPD %ZMM10,%ZMM12,%ZMM10 |
(1310) 0x289abc VMULPD (%RDI){1to8},%ZMM9,%ZMM12 |
(1310) 0x289ac2 VRNDSCALEPD $0xb,%ZMM11,%ZMM11 |
(1310) 0x289ac9 VFMADD231PD 0x8(%RDI){1to8},%ZMM10,%ZMM12 |
(1310) 0x289ad0 VSUBPD %ZMM11,%ZMM8,%ZMM8 |
(1310) 0x289ad6 VFMADD231PD 0x10(%RDI){1to8},%ZMM8,%ZMM12 |
(1310) 0x289add VMOVUPD %ZMM12,(%R8,%R13,8) |
(1310) 0x289ae4 VMULPD 0x18(%RDI){1to8},%ZMM9,%ZMM11 |
(1310) 0x289aeb VFMADD231PD 0x20(%RDI){1to8},%ZMM10,%ZMM11 |
(1310) 0x289af2 VFMADD231PD 0x28(%RDI){1to8},%ZMM8,%ZMM11 |
(1310) 0x289af9 VMOVUPD %ZMM11,(%RBX,%R13,8) |
(1310) 0x289b00 VMULPD 0x30(%RDI){1to8},%ZMM9,%ZMM9 |
(1310) 0x289b07 VFMADD231PD 0x38(%RDI){1to8},%ZMM10,%ZMM9 |
(1310) 0x289b0e VFMADD231PD 0x40(%RDI){1to8},%ZMM8,%ZMM9 |
(1310) 0x289b15 VMOVUPD %ZMM9,(%R11,%R13,8) |
(1310) 0x289b1c VMOVUPD (%R8,%R13,8),%ZMM8 |
(1310) 0x289b23 VMOVUPD (%RBX,%R13,8),%ZMM10 |
(1310) 0x289b2a VMULPD %ZMM8,%ZMM8,%ZMM8 |
(1310) 0x289b30 VFMADD231PD %ZMM9,%ZMM9,%ZMM8 |
(1310) 0x289b36 VFMADD231PD %ZMM10,%ZMM10,%ZMM8 |
(1310) 0x289b3c VSQRTPD %ZMM8,%ZMM8 |
(1310) 0x289b42 VMOVUPD %ZMM8,(%RCX,%R13,8) |
(1310) 0x289b49 ADD $0x8,%R13D |
(1310) 0x289b4d ADD $-0x8,%R12D |
(1310) 0x289b51 JNE 289a00 |
0x289b57 CMP %R14D,%R15D |
0x289b5a JE 289e1a |
0x289b60 TEST $0x4,%R15B |
0x289b64 JE 289cde |
0x289b6a VPBROADCASTQ -0x858e3(%RIP),%YMM6 |
0x289b73 VPBROADCASTQ -0x85a44(%RIP),%YMM7 |
0x289b7c MOV %R14D,%R12D |
0x289b7f MOV %R15D,%R14D |
0x289b82 AND $-0x4,%R14D |
0x289b86 VBROADCASTSD %XMM0,%YMM3 |
0x289b8b VBROADCASTSD %XMM1,%YMM4 |
0x289b90 VBROADCASTSD %XMM2,%YMM5 |
0x289b95 LEA (%R12,%R9,1),%R13D |
0x289b99 SUB %R14D,%R12D |
0x289b9c NOPL (%RAX) |
(1309) 0x289ba0 MOVSXD %R13D,%R13 |
(1309) 0x289ba3 VMOVUPD (%RDX,%R13,8),%YMM8 |
(1309) 0x289ba9 VMOVUPD (%R10,%R13,8),%YMM9 |
(1309) 0x289baf VMOVUPD (%RSI,%R13,8),%YMM10 |
(1309) 0x289bb5 VSUBPD %YMM3,%YMM8,%YMM8 |
(1309) 0x289bb9 VSUBPD %YMM4,%YMM9,%YMM9 |
(1309) 0x289bbd VMULPD 0x48(%RDI){1to4},%YMM8,%YMM11 |
(1309) 0x289bc4 VSUBPD %YMM5,%YMM10,%YMM10 |
(1309) 0x289bc8 VMULPD 0x60(%RDI){1to4},%YMM8,%YMM12 |
(1309) 0x289bcf VMULPD 0x78(%RDI){1to4},%YMM8,%YMM8 |
(1309) 0x289bd6 VFMADD231PD 0x50(%RDI){1to4},%YMM9,%YMM11 |
(1309) 0x289bdd VFMADD231PD 0x68(%RDI){1to4},%YMM9,%YMM12 |
(1309) 0x289be4 VFMADD231PD 0x80(%RDI){1to4},%YMM9,%YMM8 |
(1309) 0x289beb VMOVDQA %YMM7,%YMM9 |
(1309) 0x289bef VFMADD231PD 0x58(%RDI){1to4},%YMM10,%YMM11 |
(1309) 0x289bf6 VFMADD231PD 0x70(%RDI){1to4},%YMM10,%YMM12 |
(1309) 0x289bfd VFMADD231PD 0x88(%RDI){1to4},%YMM10,%YMM8 |
(1309) 0x289c04 VMOVDQA %YMM7,%YMM10 |
(1309) 0x289c08 VPTERNLOGQ $-0x8,%YMM6,%YMM11,%YMM9 |
(1309) 0x289c0f VPTERNLOGQ $-0x8,%YMM6,%YMM12,%YMM10 |
(1309) 0x289c16 VADDPD %YMM9,%YMM11,%YMM9 |
(1309) 0x289c1b VADDPD %YMM10,%YMM12,%YMM10 |
(1309) 0x289c20 VROUNDPD $0xb,%YMM9,%YMM9 |
(1309) 0x289c26 VROUNDPD $0xb,%YMM10,%YMM10 |
(1309) 0x289c2c VSUBPD %YMM9,%YMM11,%YMM9 |
(1309) 0x289c31 VMOVDQA %YMM7,%YMM11 |
(1309) 0x289c35 VPTERNLOGQ $-0x8,%YMM6,%YMM8,%YMM11 |
(1309) 0x289c3c VSUBPD %YMM10,%YMM12,%YMM10 |
(1309) 0x289c41 VADDPD %YMM11,%YMM8,%YMM11 |
(1309) 0x289c46 VROUNDPD $0xb,%YMM11,%YMM11 |
(1309) 0x289c4c VSUBPD %YMM11,%YMM8,%YMM8 |
(1309) 0x289c51 VMULPD (%RDI){1to4},%YMM9,%YMM11 |
(1309) 0x289c57 VFMADD231PD 0x8(%RDI){1to4},%YMM10,%YMM11 |
(1309) 0x289c5e VFMADD231PD 0x10(%RDI){1to4},%YMM8,%YMM11 |
(1309) 0x289c65 VMOVUPD %YMM11,(%R8,%R13,8) |
(1309) 0x289c6b VMULPD 0x18(%RDI){1to4},%YMM9,%YMM11 |
(1309) 0x289c72 VFMADD231PD 0x20(%RDI){1to4},%YMM10,%YMM11 |
(1309) 0x289c79 VFMADD231PD 0x28(%RDI){1to4},%YMM8,%YMM11 |
(1309) 0x289c80 VMOVUPD %YMM11,(%RBX,%R13,8) |
(1309) 0x289c86 VMULPD 0x30(%RDI){1to4},%YMM9,%YMM9 |
(1309) 0x289c8d VFMADD231PD 0x38(%RDI){1to4},%YMM10,%YMM9 |
(1309) 0x289c94 VFMADD231PD 0x40(%RDI){1to4},%YMM8,%YMM9 |
(1309) 0x289c9b VMOVUPD %YMM9,(%R11,%R13,8) |
(1309) 0x289ca1 VMOVUPD (%R8,%R13,8),%YMM8 |
(1309) 0x289ca7 VMOVUPD (%RBX,%R13,8),%YMM10 |
(1309) 0x289cad VMULPD %YMM8,%YMM8,%YMM8 |
(1309) 0x289cb2 VFMADD231PD %YMM9,%YMM9,%YMM8 |
(1309) 0x289cb7 VFMADD231PD %YMM10,%YMM10,%YMM8 |
(1309) 0x289cbc VSQRTPD %YMM8,%YMM8 |
(1309) 0x289cc1 VMOVUPD %YMM8,(%RCX,%R13,8) |
(1309) 0x289cc7 ADD $0x4,%R13D |
(1309) 0x289ccb ADD $0x4,%R12D |
(1309) 0x289ccf JNE 289ba0 |
0x289cd5 CMP %R14D,%R15D |
0x289cd8 JE 289e1a |
0x289cde VPBROADCASTQ -0x85a57(%RIP),%XMM3 |
0x289ce7 VPBROADCASTQ -0x85bb8(%RIP),%XMM4 |
0x289cf0 ADD %R9D,%R14D |
0x289cf3 NOPW %CS:(%RAX,%RAX,1) |
(1308) 0x289d00 MOVSXD %R14D,%R14 |
(1308) 0x289d03 VMOVSD (%RDX,%R14,8),%XMM5 |
(1308) 0x289d09 VMOVSD (%R10,%R14,8),%XMM6 |
(1308) 0x289d0f VMOVSD (%RSI,%R14,8),%XMM7 |
(1308) 0x289d15 VSUBSD %XMM0,%XMM5,%XMM5 |
(1308) 0x289d19 VSUBSD %XMM1,%XMM6,%XMM6 |
(1308) 0x289d1d VMULSD 0x48(%RDI),%XMM5,%XMM8 |
(1308) 0x289d22 VSUBSD %XMM2,%XMM7,%XMM7 |
(1308) 0x289d26 VMULSD 0x60(%RDI),%XMM5,%XMM9 |
(1308) 0x289d2b VMULSD 0x78(%RDI),%XMM5,%XMM5 |
(1308) 0x289d30 VFMADD231SD 0x50(%RDI),%XMM6,%XMM8 |
(1308) 0x289d36 VFMADD231SD 0x68(%RDI),%XMM6,%XMM9 |
(1308) 0x289d3c VFMADD231SD 0x80(%RDI),%XMM6,%XMM5 |
(1308) 0x289d45 VMOVDQA %XMM4,%XMM6 |
(1308) 0x289d49 VFMADD231SD 0x58(%RDI),%XMM7,%XMM8 |
(1308) 0x289d4f VFMADD231SD 0x70(%RDI),%XMM7,%XMM9 |
(1308) 0x289d55 VFMADD231SD 0x88(%RDI),%XMM7,%XMM5 |
(1308) 0x289d5e VMOVDQA %XMM4,%XMM7 |
(1308) 0x289d62 VPTERNLOGQ $-0x8,%XMM3,%XMM8,%XMM6 |
(1308) 0x289d69 VPTERNLOGQ $-0x8,%XMM3,%XMM9,%XMM7 |
(1308) 0x289d70 VADDSD %XMM6,%XMM8,%XMM6 |
(1308) 0x289d74 VADDSD %XMM7,%XMM9,%XMM7 |
(1308) 0x289d78 VROUNDSD $0xb,%XMM6,%XMM6,%XMM6 |
(1308) 0x289d7e VROUNDSD $0xb,%XMM7,%XMM7,%XMM7 |
(1308) 0x289d84 VSUBSD %XMM6,%XMM8,%XMM6 |
(1308) 0x289d88 VMOVDQA %XMM4,%XMM8 |
(1308) 0x289d8c VPTERNLOGQ $-0x8,%XMM3,%XMM5,%XMM8 |
(1308) 0x289d93 VSUBSD %XMM7,%XMM9,%XMM7 |
(1308) 0x289d97 VADDSD %XMM5,%XMM8,%XMM8 |
(1308) 0x289d9b VROUNDSD $0xb,%XMM8,%XMM8,%XMM8 |
(1308) 0x289da1 VSUBSD %XMM8,%XMM5,%XMM5 |
(1308) 0x289da6 VMULSD (%RDI),%XMM6,%XMM8 |
(1308) 0x289daa VFMADD231SD 0x8(%RDI),%XMM7,%XMM8 |
(1308) 0x289db0 VFMADD231SD 0x10(%RDI),%XMM5,%XMM8 |
(1308) 0x289db6 VMOVSD %XMM8,(%R8,%R14,8) |
(1308) 0x289dbc VMULSD 0x18(%RDI),%XMM6,%XMM8 |
(1308) 0x289dc1 VFMADD231SD 0x20(%RDI),%XMM7,%XMM8 |
(1308) 0x289dc7 VFMADD231SD 0x28(%RDI),%XMM5,%XMM8 |
(1308) 0x289dcd VMOVSD %XMM8,(%RBX,%R14,8) |
(1308) 0x289dd3 VMULSD 0x30(%RDI),%XMM6,%XMM6 |
(1308) 0x289dd8 VFMADD231SD 0x38(%RDI),%XMM7,%XMM6 |
(1308) 0x289dde VFMADD231SD 0x40(%RDI),%XMM5,%XMM6 |
(1308) 0x289de4 VMOVSD %XMM6,(%R11,%R14,8) |
(1308) 0x289dea VMOVSD (%R8,%R14,8),%XMM5 |
(1308) 0x289df0 VMOVSD (%RBX,%R14,8),%XMM7 |
(1308) 0x289df6 VMULSD %XMM5,%XMM5,%XMM5 |
(1308) 0x289dfa VFMADD231SD %XMM6,%XMM6,%XMM5 |
(1308) 0x289dff VFMADD231SD %XMM7,%XMM7,%XMM5 |
(1308) 0x289e04 VSQRTSD %XMM5,%XMM5,%XMM5 |
(1308) 0x289e08 VMOVSD %XMM5,(%RCX,%R14,8) |
(1308) 0x289e0e INC %R14D |
(1308) 0x289e11 CMP %R14D,%EAX |
(1308) 0x289e14 JNE 289d00 |
0x289e1a POP %RBX |
0x289e1b POP %R12 |
0x289e1d POP %R13 |
0x289e1f POP %R14 |
0x289e21 POP %R15 |
0x289e23 POP %RBP |
0x289e24 VZEROUPPER |
0x289e27 RET |
0x289e28 INT $0x3 |
0x289e29 INT $0x3 |
0x289e2a INT $0x3 |
0x289e2b INT $0x3 |
0x289e2c INT $0x3 |
0x289e2d INT $0x3 |
0x289e2e INT $0x3 |
0x289e2f INT $0x3 |
Path / |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | exec |
nb instructions | 80 |
nb uops | 69 |
loop length | 314 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 5 |
used zmm registers | 5 |
nb stack references | 1 |
micro-operation queue | 11.50 cycles |
front end | 11.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.75 | 5.75 | 5.75 | 5.75 | 4.00 | 4.67 | 4.67 | 4.67 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
cycles | 5.75 | 5.75 | 5.75 | 5.75 | 4.00 | 4.67 | 4.67 | 4.67 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 11.50 |
Dispatch | 5.75 |
Overall L1 | 11.50 |
all | 6% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 4% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 10% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EAX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R9D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 289e1a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4ca> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD (%RSI),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x8(%RSI),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x10(%RSI),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%RDX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RDX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%RDX,%RSI,8),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x4,%RSI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R8,%R11,8),%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x4,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R8,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP $0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 289cde <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x38e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
LEA -0x1(%RAX),%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R9D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 289cde <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x38e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP $0x8,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 289b6a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x21a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x85741(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VPBROADCASTQ -0x858a3(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD %XMM0,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM1,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
MOV %R15D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x8,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
CMP %R14D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 289e1a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4ca> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST $0x4,%R15B | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 289cde <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x38e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x858e3(%RIP),%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x85a44(%RIP),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x4,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VBROADCASTSD %XMM0,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM1,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM2,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R12,%R9,1),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R14D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
CMP %R14D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 289e1a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4ca> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x85a57(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x85bb8(%RIP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R9D,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Source file and lines | ParticleBConds3DSoa.h:221-257 |
Module | exec |
nb instructions | 80 |
nb uops | 69 |
loop length | 314 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 5 |
used zmm registers | 5 |
nb stack references | 1 |
micro-operation queue | 11.50 cycles |
front end | 11.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.75 | 5.75 | 5.75 | 5.75 | 4.00 | 4.67 | 4.67 | 4.67 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
cycles | 5.75 | 5.75 | 5.75 | 5.75 | 4.00 | 4.67 | 4.67 | 4.67 | 0.00 | 3.00 | 3.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 11.50 |
Dispatch | 5.75 |
Overall L1 | 11.50 |
all | 6% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 4% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 10% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EAX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R9D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 289e1a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4ca> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD (%RSI),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x8(%RSI),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x10(%RSI),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%RDX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RDX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%R8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%RDX,%RSI,8),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x4,%RSI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R8,%R11,8),%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x4,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R8,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP $0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 289cde <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x38e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
LEA -0x1(%RAX),%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R9D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 289cde <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x38e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP $0x8,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 289b6a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x21a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x85741(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VPBROADCASTQ -0x858a3(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VBROADCASTSD %XMM0,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM1,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
MOV %R15D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x8,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
CMP %R14D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 289e1a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4ca> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST $0x4,%R15B | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 289cde <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x38e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x858e3(%RIP),%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x85a44(%RIP),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x4,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VBROADCASTSD %XMM0,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM1,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM2,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R12,%R9,1),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R14D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
CMP %R14D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 289e1a <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4ca> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VPBROADCASTQ -0x85a57(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ -0x85bb8(%RIP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R9D,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm64EEEEES8_EEvRKT_RKT0_PdRT1_iii– | 0.67 | 1.4 |
○Loop 1310 - ParticleBConds3DSoa.h:234-255 - exec | 0.67 | 1.27 |
○Loop 1309 - ParticleBConds3DSoa.h:234-255 - exec | 0 | 0 |
○Loop 1308 - ParticleBConds3DSoa.h:234-255 - exec | 0 | 0 |