Function: _ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd | Module: libqmcwfs.so | Source: BsplineFunctor.h:228-262 [...] | Coverage: 1.47% |
---|
Function: _ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd | Module: libqmcwfs.so | Source: BsplineFunctor.h:228-262 [...] | Coverage: 1.47% |
---|
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 1064 - 1064 |
-------------------------------------------------------------------------------- |
1064: return *(this->_M_impl._M_start + __n); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 228 - 262 |
-------------------------------------------------------------------------------- |
228: { |
[...] |
236: for (int jat = 0; jat < iLimit; jat++) |
237: { |
238: real_type r = distArray[jat]; |
239: // pick the distances smaller than the cutoff and avoid the reference atom |
240: if (r < cutoff_radius && iStart + jat != iat) |
241: distArrayCompressed[iCount++] = distArray[jat]; |
242: } |
243: |
244: real_type d = 0.0; |
245: //#pragma omp simd reduction(+:d) |
246: for (int jat = 0; jat < iCount; jat++) |
247: { |
248: real_type r = distArrayCompressed[jat]; |
249: r *= DeltaRInv; |
250: int i = (int)r; |
251: real_type t = r - real_type(i); |
252: real_type tp0 = t * t * t; |
253: real_type tp1 = t * t; |
254: real_type tp2 = t; |
255: |
256: real_type d1 = SplineCoefs[i + 0] * (A[0] * tp0 + A[1] * tp1 + A[2] * tp2 + A[3]); |
257: real_type d2 = SplineCoefs[i + 1] * (A[4] * tp0 + A[5] * tp1 + A[6] * tp2 + A[7]); |
258: real_type d3 = SplineCoefs[i + 2] * (A[8] * tp0 + A[9] * tp1 + A[10] * tp2 + A[11]); |
259: real_type d4 = SplineCoefs[i + 3] * (A[12] * tp0 + A[13] * tp1 + A[14] * tp2 + A[15]); |
260: d += (d1 + d2 + d3 + d4); |
261: } |
262: return d; |
0x30ac0 PUSH %RBP |
0x30ac1 MOV %RSP,%RBP |
0x30ac4 PUSH %R15 |
0x30ac6 PUSH %R14 |
0x30ac8 PUSH %R13 |
0x30aca PUSH %R12 |
0x30acc PUSH %RBX |
0x30acd AND $-0x20,%RSP |
0x30ad1 SUB $0x140,%RSP |
0x30ad8 SUB %EDX,%ECX |
0x30ada VXORPD %XMM0,%XMM0,%XMM0 |
0x30ade JLE 31160 |
0x30ae4 VMOVSD 0x8(%RDI),%XMM1 |
0x30ae9 MOVSXD %EDX,%RDX |
0x30aec MOVSXD %ESI,%RSI |
0x30aef MOV %ECX,%ECX |
0x30af1 LEA (%R8,%RDX,8),%RAX |
0x30af5 SUB %RDX,%RSI |
0x30af8 XOR %R8D,%R8D |
0x30afb XOR %EDX,%EDX |
0x30afd JMP 30b08 |
0x30aff NOP |
(449) 0x30b00 INC %R8 |
(449) 0x30b03 CMP %R8,%RCX |
(449) 0x30b06 JE 30b26 |
(449) 0x30b08 CMP %R8,%RSI |
(449) 0x30b0b JE 30b00 |
(449) 0x30b0d VMOVSD (%RAX,%R8,8),%XMM2 |
(449) 0x30b13 VUCOMISD %XMM1,%XMM2 |
(449) 0x30b17 JAE 30b00 |
(449) 0x30b19 MOVSXD %EDX,%R10 |
(449) 0x30b1c INC %EDX |
(449) 0x30b1e VMOVSD %XMM2,(%R9,%R10,8) |
(449) 0x30b24 JMP 30b00 |
0x30b26 TEST %EDX,%EDX |
0x30b28 JLE 31160 |
0x30b2e VMOVSD 0x238(%RDI),%XMM12 |
0x30b36 VMOVSD 0x18(%RDI),%XMM13 |
0x30b3b VMOVUPD 0x20(%RDI),%XMM14 |
0x30b40 VMOVSD 0x30(%RDI),%XMM15 |
0x30b45 VMOVSD 0x38(%RDI),%XMM2 |
0x30b4a VMOVSD 0x40(%RDI),%XMM3 |
0x30b4f VMOVSD 0x48(%RDI),%XMM4 |
0x30b54 VMOVSD 0x58(%RDI),%XMM5 |
0x30b59 VMOVSD 0x60(%RDI),%XMM8 |
0x30b5e VMOVSD 0x68(%RDI),%XMM6 |
0x30b63 VMOVSD 0x50(%RDI),%XMM7 |
0x30b68 VMOVSD 0x70(%RDI),%XMM9 |
0x30b6d VMOVSD 0x78(%RDI),%XMM16 |
0x30b74 VMOVSD 0x80(%RDI),%XMM30 |
0x30b7b VMOVSD 0x88(%RDI),%XMM31 |
0x30b82 VMOVSD 0x90(%RDI),%XMM17 |
0x30b89 MOV 0x218(%RDI),%RAX |
0x30b90 MOV %EDX,%ECX |
0x30b92 CMP $0x8,%EDX |
0x30b95 JAE 30ba2 |
0x30b97 VXORPD %XMM0,%XMM0,%XMM0 |
0x30b9b XOR %EDX,%EDX |
0x30b9d JMP 31095 |
0x30ba2 VMOVAPD %XMM12,0xf0(%RSP) |
0x30bab VBROADCASTSD %XMM12,%YMM0 |
0x30bb0 VPBROADCASTD -0x27361(%RIP),%XMM11 |
0x30bb9 VPBROADCASTD -0x2737a(%RIP),%XMM12 |
0x30bc2 VMOVAPD %XMM17,%XMM1 |
0x30bc8 MOV %RCX,%RSI |
0x30bcb SHR $0x3,%RSI |
0x30bcf VMOVAPD %XMM30,0xb0(%RSP) |
0x30bd7 VMOVAPD %XMM31,0xa0(%RSP) |
0x30bdf VBROADCASTSD %XMM13,%YMM17 |
0x30be5 VBROADCASTSD %XMM14,%YMM18 |
0x30beb VPERMPD $0x55,%YMM14,%YMM19 |
0x30bf2 VBROADCASTSD %XMM15,%YMM20 |
0x30bf8 VBROADCASTSD %XMM2,%YMM21 |
0x30bfe VBROADCASTSD %XMM3,%YMM22 |
0x30c04 VBROADCASTSD %XMM4,%YMM23 |
0x30c0a VBROADCASTSD %XMM7,%YMM24 |
0x30c10 VBROADCASTSD %XMM5,%YMM25 |
0x30c16 VBROADCASTSD %XMM8,%YMM26 |
0x30c1c VBROADCASTSD %XMM6,%YMM27 |
0x30c22 VMOVAPD %XMM9,0x10(%RSP) |
0x30c28 VBROADCASTSD %XMM9,%YMM28 |
0x30c2e VBROADCASTSD %XMM16,%YMM29 |
0x30c34 VBROADCASTSD %XMM30,%YMM30 |
0x30c3a VBROADCASTSD %XMM31,%YMM31 |
0x30c40 VBROADCASTSD %XMM1,%YMM9 |
0x30c45 MOV %ECX,%EDX |
0x30c47 VMOVAPD %XMM15,0xd0(%RSP) |
0x30c50 VMOVAPD %XMM1,0x90(%RSP) |
0x30c59 AND $-0x8,%EDX |
0x30c5c VXORPD %XMM1,%XMM1,%XMM1 |
0x30c60 VXORPD %XMM15,%XMM15,%XMM15 |
0x30c65 VMOVAPD %XMM13,0xe0(%RSP) |
0x30c6e VMOVAPD %YMM14,0x100(%RSP) |
0x30c77 VMOVAPD %XMM2,0x80(%RSP) |
0x30c80 VMOVAPD %XMM3,0x70(%RSP) |
0x30c86 VMOVAPD %XMM4,0x60(%RSP) |
0x30c8c VMOVAPD %XMM7,0x30(%RSP) |
0x30c92 VMOVAPD %XMM5,0x50(%RSP) |
0x30c98 VMOVAPD %XMM8,0x20(%RSP) |
0x30c9e VMOVAPD %XMM6,0x40(%RSP) |
0x30ca4 VMOVAPD %XMM16,0xc0(%RSP) |
0x30cac XOR %EDI,%EDI |
0x30cae SAL $0x6,%RSI |
0x30cb2 NOPW %CS:(%RAX,%RAX,1) |
(447) 0x30cc0 VMULPD (%R9,%RDI,1),%YMM0,%YMM2 |
(447) 0x30cc6 VMULPD 0x20(%R9,%RDI,1),%YMM0,%YMM5 |
(447) 0x30ccd ADD $0x40,%RDI |
(447) 0x30cd1 VCVTTPD2DQ %YMM2,%XMM10 |
(447) 0x30cd5 VCVTTPD2DQ %YMM5,%XMM4 |
(447) 0x30cd9 VROUNDPD $0xb,%YMM2,%YMM6 |
(447) 0x30cdf VROUNDPD $0xb,%YMM5,%YMM13 |
(447) 0x30ce5 VPMOVSXDQ %XMM10,%YMM7 |
(447) 0x30cea VPMOVSXDQ %XMM4,%YMM8 |
(447) 0x30cef VSUBPD %YMM6,%YMM2,%YMM16 |
(447) 0x30cf5 VSUBPD %YMM13,%YMM5,%YMM13 |
(447) 0x30cfa VPCMPEQD %XMM6,%XMM6,%XMM6 |
(447) 0x30cfe VMOVQ %XMM7,%R10 |
(447) 0x30d03 VPEXTRQ $0x1,%XMM7,%R11 |
(447) 0x30d09 VEXTRACTI128 $0x1,%YMM7,%XMM7 |
(447) 0x30d0f VMOVQ %XMM8,%R8 |
(447) 0x30d14 VPEXTRQ $0x1,%XMM8,%R15 |
(447) 0x30d1a VPEXTRQ $0x1,%XMM7,%RBX |
(447) 0x30d20 VMOVQ %XMM7,%R14 |
(447) 0x30d25 VEXTRACTI128 $0x1,%YMM8,%XMM7 |
(447) 0x30d2b VMOVSD (%RAX,%R10,8),%XMM8 |
(447) 0x30d31 VMULPD %YMM16,%YMM16,%YMM2 |
(447) 0x30d37 VMULPD %YMM13,%YMM13,%YMM14 |
(447) 0x30d3c VMOVQ %XMM7,%R12 |
(447) 0x30d41 VPEXTRQ $0x1,%XMM7,%R13 |
(447) 0x30d47 VMOVSD (%RAX,%R14,8),%XMM7 |
(447) 0x30d4d VMOVHPD (%RAX,%R11,8),%XMM8,%XMM8 |
(447) 0x30d53 VMOVHPD (%RAX,%RBX,8),%XMM7,%XMM7 |
(447) 0x30d58 VMULPD %YMM16,%YMM2,%YMM5 |
(447) 0x30d5e VMULPD %YMM13,%YMM14,%YMM3 |
(447) 0x30d63 VINSERTF128 $0x1,%XMM7,%YMM8,%YMM7 |
(447) 0x30d69 VMOVAPD %YMM18,%YMM8 |
(447) 0x30d6f VFMADD213PD %YMM20,%YMM2,%YMM8 |
(447) 0x30d75 VFMADD231PD %YMM16,%YMM19,%YMM8 |
(447) 0x30d7b VFMADD231PD %YMM17,%YMM5,%YMM8 |
(447) 0x30d81 VFMADD213PD %YMM1,%YMM7,%YMM8 |
(447) 0x30d86 VMOVSD (%RAX,%R12,8),%XMM1 |
(447) 0x30d8c VMOVSD (%RAX,%R8,8),%XMM7 |
(447) 0x30d92 VMOVHPD (%RAX,%R13,8),%XMM1,%XMM1 |
(447) 0x30d98 VMOVHPD (%RAX,%R15,8),%XMM7,%XMM7 |
(447) 0x30d9e VINSERTF128 $0x1,%XMM1,%YMM7,%YMM1 |
(447) 0x30da4 VMOVAPD %YMM18,%YMM7 |
(447) 0x30daa VFMADD213PD %YMM20,%YMM14,%YMM7 |
(447) 0x30db0 VFMADD231PD %YMM13,%YMM19,%YMM7 |
(447) 0x30db6 VFMADD231PD %YMM17,%YMM3,%YMM7 |
(447) 0x30dbc VFMADD213PD %YMM15,%YMM1,%YMM7 |
(447) 0x30dc1 VPSUBD %XMM6,%XMM10,%XMM1 |
(447) 0x30dc5 VPSUBD %XMM6,%XMM4,%XMM15 |
(447) 0x30dc9 VMOVAPD %YMM3,%YMM6 |
(447) 0x30dcd VFMADD213PD %YMM28,%YMM25,%YMM6 |
(447) 0x30dd3 VPMOVSXDQ %XMM1,%YMM1 |
(447) 0x30dd8 VPMOVSXDQ %XMM15,%YMM15 |
(447) 0x30ddd VMOVQ %XMM1,%R10 |
(447) 0x30de2 VPEXTRQ $0x1,%XMM1,%R11 |
(447) 0x30de8 VEXTRACTI128 $0x1,%YMM1,%XMM1 |
(447) 0x30dee VMOVQ %XMM15,%R8 |
(447) 0x30df3 VPEXTRQ $0x1,%XMM15,%R15 |
(447) 0x30df9 VMOVQ %XMM1,%RBX |
(447) 0x30dfe VPEXTRQ $0x1,%XMM1,%R14 |
(447) 0x30e04 VEXTRACTI128 $0x1,%YMM15,%XMM1 |
(447) 0x30e0a VMOVSD (%RAX,%R10,8),%XMM15 |
(447) 0x30e10 VFMADD231PD %YMM14,%YMM26,%YMM6 |
(447) 0x30e16 VMOVQ %XMM1,%R12 |
(447) 0x30e1b VPEXTRQ $0x1,%XMM1,%R13 |
(447) 0x30e21 VMOVSD (%RAX,%RBX,8),%XMM1 |
(447) 0x30e26 VMOVHPD (%RAX,%R11,8),%XMM15,%XMM15 |
(447) 0x30e2c VMOVHPD (%RAX,%R14,8),%XMM1,%XMM1 |
(447) 0x30e32 VFMADD231PD %YMM13,%YMM27,%YMM6 |
(447) 0x30e38 VINSERTF128 $0x1,%XMM1,%YMM15,%YMM1 |
(447) 0x30e3e VMOVAPD %YMM5,%YMM15 |
(447) 0x30e42 VFMADD213PD %YMM24,%YMM21,%YMM15 |
(447) 0x30e48 VFMADD231PD %YMM2,%YMM22,%YMM15 |
(447) 0x30e4e VFMADD231PD %YMM16,%YMM23,%YMM15 |
(447) 0x30e54 VFMADD213PD %YMM8,%YMM1,%YMM15 |
(447) 0x30e59 VMOVSD (%RAX,%R12,8),%XMM1 |
(447) 0x30e5f VMOVSD (%RAX,%R8,8),%XMM8 |
(447) 0x30e65 VMOVHPD (%RAX,%R13,8),%XMM1,%XMM1 |
(447) 0x30e6b VMOVHPD (%RAX,%R15,8),%XMM8,%XMM8 |
(447) 0x30e71 VINSERTF128 $0x1,%XMM1,%YMM8,%YMM1 |
(447) 0x30e77 VMOVAPD %YMM3,%YMM8 |
(447) 0x30e7b VFMADD213PD %YMM24,%YMM21,%YMM8 |
(447) 0x30e81 VFMADD213PD %YMM9,%YMM29,%YMM3 |
(447) 0x30e87 VFMADD231PD %YMM14,%YMM22,%YMM8 |
(447) 0x30e8d VFMADD231PD %YMM14,%YMM30,%YMM3 |
(447) 0x30e93 VFMADD231PD %YMM13,%YMM23,%YMM8 |
(447) 0x30e99 VFMADD231PD %YMM13,%YMM31,%YMM3 |
(447) 0x30e9f VFMADD213PD %YMM7,%YMM1,%YMM8 |
(447) 0x30ea4 VPADDD %XMM11,%XMM10,%XMM1 |
(447) 0x30ea9 VPADDD %XMM4,%XMM11,%XMM7 |
(447) 0x30ead VPADDD %XMM4,%XMM12,%XMM4 |
(447) 0x30eb1 VPMOVSXDQ %XMM1,%YMM1 |
(447) 0x30eb6 VPMOVSXDQ %XMM7,%YMM7 |
(447) 0x30ebb VPMOVSXDQ %XMM4,%YMM4 |
(447) 0x30ec0 VPEXTRQ $0x1,%XMM1,%R10 |
(447) 0x30ec6 VMOVQ %XMM1,%R11 |
(447) 0x30ecb VEXTRACTI128 $0x1,%YMM1,%XMM1 |
(447) 0x30ed1 VMOVQ %XMM7,%R8 |
(447) 0x30ed6 VPEXTRQ $0x1,%XMM7,%R15 |
(447) 0x30edc VMOVQ %XMM1,%RBX |
(447) 0x30ee1 VPEXTRQ $0x1,%XMM1,%R14 |
(447) 0x30ee7 VEXTRACTI128 $0x1,%YMM7,%XMM1 |
(447) 0x30eed VMOVSD (%RAX,%R11,8),%XMM7 |
(447) 0x30ef3 VMOVQ %XMM1,%R12 |
(447) 0x30ef8 VPEXTRQ $0x1,%XMM1,%R13 |
(447) 0x30efe VMOVSD (%RAX,%RBX,8),%XMM1 |
(447) 0x30f03 VMOVHPD (%RAX,%R10,8),%XMM7,%XMM7 |
(447) 0x30f09 VPEXTRQ $0x1,%XMM4,%R10 |
(447) 0x30f0f VMOVHPD (%RAX,%R14,8),%XMM1,%XMM1 |
(447) 0x30f15 VINSERTF128 $0x1,%XMM1,%YMM7,%YMM1 |
(447) 0x30f1b VMOVAPD %YMM5,%YMM7 |
(447) 0x30f1f VFMADD213PD %YMM28,%YMM25,%YMM7 |
(447) 0x30f25 VFMADD213PD %YMM9,%YMM29,%YMM5 |
(447) 0x30f2b VFMADD231PD %YMM2,%YMM26,%YMM7 |
(447) 0x30f31 VFMADD231PD %YMM2,%YMM30,%YMM5 |
(447) 0x30f37 VFMADD231PD %YMM16,%YMM27,%YMM7 |
(447) 0x30f3d VFMADD231PD %YMM16,%YMM31,%YMM5 |
(447) 0x30f43 VFMADD213PD %YMM15,%YMM1,%YMM7 |
(447) 0x30f48 VMOVSD (%RAX,%R8,8),%XMM15 |
(447) 0x30f4e VMOVSD (%RAX,%R12,8),%XMM1 |
(447) 0x30f54 VMOVQ %XMM4,%R8 |
(447) 0x30f59 VMOVHPD (%RAX,%R13,8),%XMM1,%XMM1 |
(447) 0x30f5f VMOVHPD (%RAX,%R15,8),%XMM15,%XMM15 |
(447) 0x30f65 VINSERTF128 $0x1,%XMM1,%YMM15,%YMM1 |
(447) 0x30f6b VMOVAPD %YMM3,%YMM15 |
(447) 0x30f6f VFMADD213PD %YMM8,%YMM1,%YMM6 |
(447) 0x30f74 VPADDD %XMM12,%XMM10,%XMM1 |
(447) 0x30f79 VPMOVSXDQ %XMM1,%YMM1 |
(447) 0x30f7e VMOVQ %XMM1,%R11 |
(447) 0x30f83 VPEXTRQ $0x1,%XMM1,%RBX |
(447) 0x30f89 VEXTRACTI128 $0x1,%YMM1,%XMM1 |
(447) 0x30f8f VPEXTRQ $0x1,%XMM1,%R14 |
(447) 0x30f95 VMOVQ %XMM1,%R15 |
(447) 0x30f9a VEXTRACTI128 $0x1,%YMM4,%XMM1 |
(447) 0x30fa0 VMOVSD (%RAX,%R11,8),%XMM4 |
(447) 0x30fa6 VMOVQ %XMM1,%R12 |
(447) 0x30fab VPEXTRQ $0x1,%XMM1,%R13 |
(447) 0x30fb1 VMOVSD (%RAX,%R15,8),%XMM1 |
(447) 0x30fb7 VMOVHPD (%RAX,%RBX,8),%XMM4,%XMM4 |
(447) 0x30fbc VMOVHPD (%RAX,%R14,8),%XMM1,%XMM1 |
(447) 0x30fc2 VMOVSD (%RAX,%R12,8),%XMM2 |
(447) 0x30fc8 VMOVHPD (%RAX,%R13,8),%XMM2,%XMM2 |
(447) 0x30fce VINSERTF128 $0x1,%XMM1,%YMM4,%YMM4 |
(447) 0x30fd4 VMOVAPD %YMM5,%YMM1 |
(447) 0x30fd8 VFMADD213PD %YMM7,%YMM4,%YMM1 |
(447) 0x30fdd VMOVSD (%RAX,%R8,8),%XMM4 |
(447) 0x30fe3 VMOVHPD (%RAX,%R10,8),%XMM4,%XMM4 |
(447) 0x30fe9 VINSERTF128 $0x1,%XMM2,%YMM4,%YMM2 |
(447) 0x30fef VFMADD213PD %YMM6,%YMM2,%YMM15 |
(447) 0x30ff4 CMP %RDI,%RSI |
(447) 0x30ff7 JNE 30cc0 |
0x30ffd VADDPD %YMM1,%YMM15,%YMM0 |
0x31001 VMOVAPD 0xf0(%RSP),%XMM12 |
0x3100a VMOVAPD 0xe0(%RSP),%XMM13 |
0x31013 VMOVAPD 0x100(%RSP),%YMM14 |
0x3101c VMOVAPD 0xd0(%RSP),%XMM15 |
0x31025 VMOVAPD 0xc0(%RSP),%XMM16 |
0x3102d VMOVAPD 0xb0(%RSP),%XMM30 |
0x31035 VMOVAPD 0xa0(%RSP),%XMM31 |
0x3103d VMOVAPD 0x90(%RSP),%XMM17 |
0x31045 VMOVAPD 0x80(%RSP),%XMM2 |
0x3104e VMOVAPD 0x70(%RSP),%XMM3 |
0x31054 VMOVAPD 0x60(%RSP),%XMM4 |
0x3105a VMOVAPD 0x50(%RSP),%XMM5 |
0x31060 VMOVAPD 0x40(%RSP),%XMM6 |
0x31066 VMOVAPD 0x30(%RSP),%XMM7 |
0x3106c VMOVAPD 0x20(%RSP),%XMM8 |
0x31072 VMOVAPD 0x10(%RSP),%XMM9 |
0x31078 VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x3107e VADDPD %XMM1,%XMM0,%XMM0 |
0x31082 VPERMILPD $0x1,%XMM0,%XMM1 |
0x31088 VADDSD %XMM1,%XMM0,%XMM0 |
0x3108c CMP %RCX,%RDX |
0x3108f JE 31160 |
0x31095 VUNPCKLPD %XMM8,%XMM2,%XMM1 |
0x3109a VUNPCKLPD %XMM5,%XMM3,%XMM2 |
0x3109e VUNPCKLPD %XMM6,%XMM4,%XMM4 |
0x310a2 VUNPCKLPD %XMM9,%XMM7,%XMM5 |
0x310a7 NOPW (%RAX,%RAX,1) |
(448) 0x310b0 VMULSD (%R9,%RDX,8),%XMM12,%XMM3 |
(448) 0x310b6 INC %RDX |
(448) 0x310b9 VROUNDSD $0xb,%XMM3,%XMM3,%XMM6 |
(448) 0x310bf VCVTTSD2SI %XMM3,%ESI |
(448) 0x310c3 MOVSXD %ESI,%RSI |
(448) 0x310c6 VSUBSD %XMM6,%XMM3,%XMM3 |
(448) 0x310ca VMULSD %XMM3,%XMM3,%XMM8 |
(448) 0x310ce VMOVDDUP %XMM3,%XMM6 |
(448) 0x310d2 VMULSD %XMM3,%XMM31,%XMM7 |
(448) 0x310d8 VUNPCKLPD %XMM6,%XMM8,%XMM9 |
(448) 0x310dc VMULSD %XMM3,%XMM8,%XMM3 |
(448) 0x310e0 VMULPD %XMM9,%XMM14,%XMM9 |
(448) 0x310e5 VUNPCKLPD %XMM8,%XMM3,%XMM11 |
(448) 0x310ea VPERMILPD $0x1,%XMM9,%XMM10 |
(448) 0x310f0 VADDSD %XMM9,%XMM10,%XMM9 |
(448) 0x310f5 VUNPCKLPD %XMM3,%XMM8,%XMM10 |
(448) 0x310f9 VFMADD213PD %XMM5,%XMM2,%XMM10 |
(448) 0x310fe VADDSD %XMM15,%XMM9,%XMM9 |
(448) 0x31103 VFMADD213PD %XMM10,%XMM1,%XMM11 |
(448) 0x31108 VFMADD231SD %XMM13,%XMM3,%XMM9 |
(448) 0x3110d VMULSD %XMM3,%XMM16,%XMM3 |
(448) 0x31113 VUNPCKLPD %XMM3,%XMM0,%XMM0 |
(448) 0x31117 VFMADD231PD %XMM6,%XMM4,%XMM11 |
(448) 0x3111c VMULPD 0x8(%RAX,%RSI,8),%XMM11,%XMM6 |
(448) 0x31122 VUNPCKLPD %XMM8,%XMM9,%XMM3 |
(448) 0x31127 VMOVSD (%RAX,%RSI,8),%XMM8 |
(448) 0x3112c VUNPCKLPD %XMM30,%XMM8,%XMM8 |
(448) 0x31132 VFMADD213PD %XMM0,%XMM3,%XMM8 |
(448) 0x31137 VUNPCKLPD %XMM7,%XMM6,%XMM0 |
(448) 0x3113b VSHUFPD $0x1,%XMM17,%XMM6,%XMM3 |
(448) 0x31142 VADDPD %XMM0,%XMM8,%XMM0 |
(448) 0x31146 VADDPD %XMM3,%XMM0,%XMM0 |
(448) 0x3114a VPERMILPD $0x1,%XMM0,%XMM3 |
(448) 0x31150 VFMADD231SD 0x18(%RAX,%RSI,8),%XMM3,%XMM0 |
(448) 0x31157 CMP %RDX,%RCX |
(448) 0x3115a JNE 310b0 |
0x31160 LEA -0x28(%RBP),%RSP |
0x31164 POP %RBX |
0x31165 POP %R12 |
0x31167 POP %R13 |
0x31169 POP %R14 |
0x3116b POP %R15 |
0x3116d POP %RBP |
0x3116e VZEROUPPER |
0x31171 RET |
0x31172 INT $0x3 |
0x31173 INT $0x3 |
0x31174 INT $0x3 |
0x31175 INT $0x3 |
0x31176 INT $0x3 |
0x31177 INT $0x3 |
0x31178 INT $0x3 |
0x31179 INT $0x3 |
0x3117a INT $0x3 |
0x3117b INT $0x3 |
0x3117c INT $0x3 |
0x3117d INT $0x3 |
0x3117e INT $0x3 |
0x3117f INT $0x3 |
Path / |
Source file and lines | BsplineFunctor.h:228-262 |
Module | libqmcwfs.so |
nb instructions | 143 |
nb uops | 126 |
loop length | 685 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 19 |
used ymm registers | 20 |
used zmm registers | 0 |
nb stack references | 17 |
micro-operation queue | 21.00 cycles |
front end | 21.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.50 | 4.50 | 4.25 | 4.25 | 3.50 | 17.33 | 17.33 | 17.33 | 1.00 | 8.50 | 8.50 | 8.00 | 8.00 | 8.00 |
cycles | 4.50 | 4.50 | 4.25 | 4.25 | 3.50 | 17.33 | 17.33 | 17.33 | 1.00 | 8.50 | 8.50 | 8.00 | 8.00 | 8.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 21.00 |
Dispatch | 17.33 |
Overall L1 | 21.00 |
all | 10% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 53% |
load | 51% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 28% |
all | 48% |
load | 48% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 24% |
all | 11% |
load | 6% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 20% |
load | 19% |
store | 26% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 29% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 19% |
load | 18% |
store | 26% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB $0x140,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %EDX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JLE 31160 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD 0x8(%RDI),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD %EDX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %ESI,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R8,%RDX,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 30b08 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x48> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
TEST %EDX,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 31160 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD 0x238(%RDI),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x18(%RDI),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVUPD 0x20(%RDI),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD 0x30(%RDI),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x38(%RDI),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x40(%RDI),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x48(%RDI),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x58(%RDI),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x60(%RDI),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x68(%RDI),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x50(%RDI),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x70(%RDI),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x78(%RDI),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x80(%RDI),%XMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x88(%RDI),%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x90(%RDI),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x218(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EDX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x8,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 30ba2 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0xe2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 31095 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x5d5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VMOVAPD %XMM12,0xf0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM12,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTD -0x27361(%RIP),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTD -0x2737a(%RIP),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM17,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x3,%RSI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM30,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM31,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM13,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM14,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VPERMPD $0x55,%YMM14,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD %XMM15,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM2,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM3,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM4,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM7,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM5,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM8,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM6,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM9,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM9,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM16,%YMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM30,%YMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM31,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM1,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
MOV %ECX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %XMM15,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM1,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
AND $-0x8,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %XMM13,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %YMM14,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM2,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM3,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM4,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM7,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM5,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM6,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM16,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x6,%RSI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VADDPD %YMM1,%YMM15,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xf0(%RSP),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xe0(%RSP),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x100(%RSP),%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xd0(%RSP),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xc0(%RSP),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xb0(%RSP),%XMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xa0(%RSP),%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x90(%RSP),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x80(%RSP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x70(%RSP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x60(%RSP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x50(%RSP),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x40(%RSP),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x30(%RSP),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x20(%RSP),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x10(%RSP),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VADDPD %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
CMP %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 31160 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VUNPCKLPD %XMM8,%XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM5,%XMM3,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM6,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM9,%XMM7,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Source file and lines | BsplineFunctor.h:228-262 |
Module | libqmcwfs.so |
nb instructions | 143 |
nb uops | 126 |
loop length | 685 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 19 |
used ymm registers | 20 |
used zmm registers | 0 |
nb stack references | 17 |
micro-operation queue | 21.00 cycles |
front end | 21.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.50 | 4.50 | 4.25 | 4.25 | 3.50 | 17.33 | 17.33 | 17.33 | 1.00 | 8.50 | 8.50 | 8.00 | 8.00 | 8.00 |
cycles | 4.50 | 4.50 | 4.25 | 4.25 | 3.50 | 17.33 | 17.33 | 17.33 | 1.00 | 8.50 | 8.50 | 8.00 | 8.00 | 8.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 21.00 |
Dispatch | 17.33 |
Overall L1 | 21.00 |
all | 10% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 53% |
load | 51% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 28% |
all | 48% |
load | 48% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 24% |
all | 11% |
load | 6% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 20% |
load | 19% |
store | 26% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 29% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 19% |
load | 18% |
store | 26% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB $0x140,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %EDX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JLE 31160 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD 0x8(%RDI),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD %EDX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %ESI,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R8,%RDX,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 30b08 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x48> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
TEST %EDX,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 31160 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD 0x238(%RDI),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x18(%RDI),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVUPD 0x20(%RDI),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD 0x30(%RDI),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x38(%RDI),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x40(%RDI),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x48(%RDI),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x58(%RDI),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x60(%RDI),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x68(%RDI),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x50(%RDI),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x70(%RDI),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x78(%RDI),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x80(%RDI),%XMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x88(%RDI),%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x90(%RDI),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x218(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EDX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x8,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 30ba2 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0xe2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 31095 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x5d5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VMOVAPD %XMM12,0xf0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM12,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTD -0x27361(%RIP),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTD -0x2737a(%RIP),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM17,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x3,%RSI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM30,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM31,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM13,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM14,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VPERMPD $0x55,%YMM14,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD %XMM15,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM2,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM3,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM4,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM7,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM5,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM8,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM6,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM9,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM9,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM16,%YMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM30,%YMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM31,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM1,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
MOV %ECX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %XMM15,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM1,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
AND $-0x8,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %XMM13,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %YMM14,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM2,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM3,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM4,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM7,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM5,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM6,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM16,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x6,%RSI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VADDPD %YMM1,%YMM15,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xf0(%RSP),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xe0(%RSP),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x100(%RSP),%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xd0(%RSP),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xc0(%RSP),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xb0(%RSP),%XMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xa0(%RSP),%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x90(%RSP),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x80(%RSP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x70(%RSP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x60(%RSP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x50(%RSP),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x40(%RSP),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x30(%RSP),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x20(%RSP),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x10(%RSP),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VADDPD %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
CMP %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 31160 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VUNPCKLPD %XMM8,%XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM5,%XMM3,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM6,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM9,%XMM7,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd– | 1.47 | 3 |
○Loop 449 - BsplineFunctor.h:236-241 - libqmcwfs.so | 1.4 | 2.62 |
○Loop 447 - BsplineFunctor.h:246-260 - libqmcwfs.so | 0.02 | 0.04 |
○Loop 448 - BsplineFunctor.h:246-260 - libqmcwfs.so | 0 | 0.01 |