Function: qmcplusplus::BsplineFunctor<double>::evaluateV(int, int, int, double const*, double*) cons ... | Module: libqmcwfs.so | Source: BsplineFunctor.h:228-262 [...] | Coverage: 1.56% |
---|
Function: qmcplusplus::BsplineFunctor<double>::evaluateV(int, int, int, double const*, double*) cons ... | Module: libqmcwfs.so | Source: BsplineFunctor.h:228-262 [...] | Coverage: 1.56% |
---|
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 228 - 262 |
-------------------------------------------------------------------------------- |
228: { |
[...] |
236: for (int jat = 0; jat < iLimit; jat++) |
237: { |
238: real_type r = distArray[jat]; |
239: // pick the distances smaller than the cutoff and avoid the reference atom |
240: if (r < cutoff_radius && iStart + jat != iat) |
241: distArrayCompressed[iCount++] = distArray[jat]; |
242: } |
243: |
244: real_type d = 0.0; |
245: //#pragma omp simd reduction(+:d) |
246: for (int jat = 0; jat < iCount; jat++) |
247: { |
248: real_type r = distArrayCompressed[jat]; |
249: r *= DeltaRInv; |
250: int i = (int)r; |
251: real_type t = r - real_type(i); |
252: real_type tp0 = t * t * t; |
253: real_type tp1 = t * t; |
254: real_type tp2 = t; |
255: |
256: real_type d1 = SplineCoefs[i + 0] * (A[0] * tp0 + A[1] * tp1 + A[2] * tp2 + A[3]); |
257: real_type d2 = SplineCoefs[i + 1] * (A[4] * tp0 + A[5] * tp1 + A[6] * tp2 + A[7]); |
258: real_type d3 = SplineCoefs[i + 2] * (A[8] * tp0 + A[9] * tp1 + A[10] * tp2 + A[11]); |
259: real_type d4 = SplineCoefs[i + 3] * (A[12] * tp0 + A[13] * tp1 + A[14] * tp2 + A[15]); |
260: d += (d1 + d2 + d3 + d4); |
261: } |
262: return d; |
/usr/lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/stl_vector.h: 1064 - 1064 |
-------------------------------------------------------------------------------- |
1064: return *(this->_M_impl._M_start + __n); |
0x307d0 PUSH %RBP |
0x307d1 MOV %RSP,%RBP |
0x307d4 PUSH %R15 |
0x307d6 PUSH %R14 |
0x307d8 PUSH %R13 |
0x307da PUSH %R12 |
0x307dc PUSH %RBX |
0x307dd AND $-0x20,%RSP |
0x307e1 SUB $0x140,%RSP |
0x307e8 SUB %EDX,%ECX |
0x307ea VXORPD %XMM0,%XMM0,%XMM0 |
0x307ee JLE 30e70 |
0x307f4 VMOVSD 0x8(%RDI),%XMM1 |
0x307f9 MOVSXD %EDX,%RDX |
0x307fc MOVSXD %ESI,%RSI |
0x307ff MOV %ECX,%ECX |
0x30801 LEA (%R8,%RDX,8),%RAX |
0x30805 SUB %RDX,%RSI |
0x30808 XOR %R8D,%R8D |
0x3080b XOR %EDX,%EDX |
0x3080d JMP 30818 |
0x3080f NOP |
(449) 0x30810 INC %R8 |
(449) 0x30813 CMP %R8,%RCX |
(449) 0x30816 JE 30836 |
(449) 0x30818 CMP %R8,%RSI |
(449) 0x3081b JE 30810 |
(449) 0x3081d VMOVSD (%RAX,%R8,8),%XMM2 |
(449) 0x30823 VUCOMISD %XMM1,%XMM2 |
(449) 0x30827 JAE 30810 |
(449) 0x30829 MOVSXD %EDX,%R10 |
(449) 0x3082c INC %EDX |
(449) 0x3082e VMOVSD %XMM2,(%R9,%R10,8) |
(449) 0x30834 JMP 30810 |
0x30836 TEST %EDX,%EDX |
0x30838 JLE 30e70 |
0x3083e VMOVSD 0x238(%RDI),%XMM12 |
0x30846 VMOVSD 0x18(%RDI),%XMM13 |
0x3084b VMOVUPD 0x20(%RDI),%XMM14 |
0x30850 VMOVSD 0x30(%RDI),%XMM15 |
0x30855 VMOVSD 0x38(%RDI),%XMM2 |
0x3085a VMOVSD 0x40(%RDI),%XMM3 |
0x3085f VMOVSD 0x48(%RDI),%XMM4 |
0x30864 VMOVSD 0x58(%RDI),%XMM5 |
0x30869 VMOVSD 0x60(%RDI),%XMM8 |
0x3086e VMOVSD 0x68(%RDI),%XMM6 |
0x30873 VMOVSD 0x50(%RDI),%XMM7 |
0x30878 VMOVSD 0x70(%RDI),%XMM9 |
0x3087d VMOVSD 0x78(%RDI),%XMM16 |
0x30884 VMOVSD 0x80(%RDI),%XMM30 |
0x3088b VMOVSD 0x88(%RDI),%XMM31 |
0x30892 VMOVSD 0x90(%RDI),%XMM17 |
0x30899 MOV 0x218(%RDI),%RAX |
0x308a0 MOV %EDX,%ECX |
0x308a2 CMP $0x8,%EDX |
0x308a5 JAE 308b2 |
0x308a7 VXORPD %XMM0,%XMM0,%XMM0 |
0x308ab XOR %EDX,%EDX |
0x308ad JMP 30da5 |
0x308b2 VMOVAPD %XMM12,0xf0(%RSP) |
0x308bb VBROADCASTSD %XMM12,%YMM0 |
0x308c0 VPBROADCASTD -0x27171(%RIP),%XMM11 |
0x308c9 VPBROADCASTD -0x2718a(%RIP),%XMM12 |
0x308d2 VMOVAPD %XMM17,%XMM1 |
0x308d8 MOV %RCX,%RSI |
0x308db SHR $0x3,%RSI |
0x308df VMOVAPD %XMM30,0xb0(%RSP) |
0x308e7 VMOVAPD %XMM31,0xa0(%RSP) |
0x308ef VBROADCASTSD %XMM13,%YMM17 |
0x308f5 VBROADCASTSD %XMM14,%YMM18 |
0x308fb VPERMPD $0x55,%YMM14,%YMM19 |
0x30902 VBROADCASTSD %XMM15,%YMM20 |
0x30908 VBROADCASTSD %XMM2,%YMM21 |
0x3090e VBROADCASTSD %XMM3,%YMM22 |
0x30914 VBROADCASTSD %XMM4,%YMM23 |
0x3091a VBROADCASTSD %XMM7,%YMM24 |
0x30920 VBROADCASTSD %XMM5,%YMM25 |
0x30926 VBROADCASTSD %XMM8,%YMM26 |
0x3092c VBROADCASTSD %XMM6,%YMM27 |
0x30932 VMOVAPD %XMM9,0x10(%RSP) |
0x30938 VBROADCASTSD %XMM9,%YMM28 |
0x3093e VBROADCASTSD %XMM16,%YMM29 |
0x30944 VBROADCASTSD %XMM30,%YMM30 |
0x3094a VBROADCASTSD %XMM31,%YMM31 |
0x30950 VBROADCASTSD %XMM1,%YMM9 |
0x30955 MOV %ECX,%EDX |
0x30957 VMOVAPD %XMM15,0xd0(%RSP) |
0x30960 VMOVAPD %XMM1,0x90(%RSP) |
0x30969 AND $-0x8,%EDX |
0x3096c VXORPD %XMM1,%XMM1,%XMM1 |
0x30970 VXORPD %XMM15,%XMM15,%XMM15 |
0x30975 VMOVAPD %XMM13,0xe0(%RSP) |
0x3097e VMOVAPD %YMM14,0x100(%RSP) |
0x30987 VMOVAPD %XMM2,0x80(%RSP) |
0x30990 VMOVAPD %XMM3,0x70(%RSP) |
0x30996 VMOVAPD %XMM4,0x60(%RSP) |
0x3099c VMOVAPD %XMM7,0x30(%RSP) |
0x309a2 VMOVAPD %XMM5,0x50(%RSP) |
0x309a8 VMOVAPD %XMM8,0x20(%RSP) |
0x309ae VMOVAPD %XMM6,0x40(%RSP) |
0x309b4 VMOVAPD %XMM16,0xc0(%RSP) |
0x309bc XOR %EDI,%EDI |
0x309be SAL $0x6,%RSI |
0x309c2 NOPW %CS:(%RAX,%RAX,1) |
(447) 0x309d0 VMULPD (%R9,%RDI,1),%YMM0,%YMM2 |
(447) 0x309d6 VMULPD 0x20(%R9,%RDI,1),%YMM0,%YMM5 |
(447) 0x309dd ADD $0x40,%RDI |
(447) 0x309e1 VCVTTPD2DQ %YMM2,%XMM10 |
(447) 0x309e5 VCVTTPD2DQ %YMM5,%XMM4 |
(447) 0x309e9 VROUNDPD $0xb,%YMM2,%YMM6 |
(447) 0x309ef VROUNDPD $0xb,%YMM5,%YMM13 |
(447) 0x309f5 VPMOVSXDQ %XMM10,%YMM7 |
(447) 0x309fa VPMOVSXDQ %XMM4,%YMM8 |
(447) 0x309ff VSUBPD %YMM6,%YMM2,%YMM16 |
(447) 0x30a05 VSUBPD %YMM13,%YMM5,%YMM13 |
(447) 0x30a0a VPCMPEQD %XMM6,%XMM6,%XMM6 |
(447) 0x30a0e VMOVQ %XMM7,%R10 |
(447) 0x30a13 VPEXTRQ $0x1,%XMM7,%R11 |
(447) 0x30a19 VEXTRACTI128 $0x1,%YMM7,%XMM7 |
(447) 0x30a1f VMOVQ %XMM8,%R8 |
(447) 0x30a24 VPEXTRQ $0x1,%XMM8,%R15 |
(447) 0x30a2a VPEXTRQ $0x1,%XMM7,%RBX |
(447) 0x30a30 VMOVQ %XMM7,%R14 |
(447) 0x30a35 VEXTRACTI128 $0x1,%YMM8,%XMM7 |
(447) 0x30a3b VMOVSD (%RAX,%R10,8),%XMM8 |
(447) 0x30a41 VMULPD %YMM16,%YMM16,%YMM2 |
(447) 0x30a47 VMULPD %YMM13,%YMM13,%YMM14 |
(447) 0x30a4c VMOVQ %XMM7,%R12 |
(447) 0x30a51 VPEXTRQ $0x1,%XMM7,%R13 |
(447) 0x30a57 VMOVSD (%RAX,%R14,8),%XMM7 |
(447) 0x30a5d VMOVHPD (%RAX,%R11,8),%XMM8,%XMM8 |
(447) 0x30a63 VMOVHPD (%RAX,%RBX,8),%XMM7,%XMM7 |
(447) 0x30a68 VMULPD %YMM16,%YMM2,%YMM5 |
(447) 0x30a6e VMULPD %YMM13,%YMM14,%YMM3 |
(447) 0x30a73 VINSERTF128 $0x1,%XMM7,%YMM8,%YMM7 |
(447) 0x30a79 VMOVAPD %YMM18,%YMM8 |
(447) 0x30a7f VFMADD213PD %YMM20,%YMM2,%YMM8 |
(447) 0x30a85 VFMADD231PD %YMM16,%YMM19,%YMM8 |
(447) 0x30a8b VFMADD231PD %YMM17,%YMM5,%YMM8 |
(447) 0x30a91 VFMADD213PD %YMM1,%YMM7,%YMM8 |
(447) 0x30a96 VMOVSD (%RAX,%R12,8),%XMM1 |
(447) 0x30a9c VMOVSD (%RAX,%R8,8),%XMM7 |
(447) 0x30aa2 VMOVHPD (%RAX,%R13,8),%XMM1,%XMM1 |
(447) 0x30aa8 VMOVHPD (%RAX,%R15,8),%XMM7,%XMM7 |
(447) 0x30aae VINSERTF128 $0x1,%XMM1,%YMM7,%YMM1 |
(447) 0x30ab4 VMOVAPD %YMM18,%YMM7 |
(447) 0x30aba VFMADD213PD %YMM20,%YMM14,%YMM7 |
(447) 0x30ac0 VFMADD231PD %YMM13,%YMM19,%YMM7 |
(447) 0x30ac6 VFMADD231PD %YMM17,%YMM3,%YMM7 |
(447) 0x30acc VFMADD213PD %YMM15,%YMM1,%YMM7 |
(447) 0x30ad1 VPSUBD %XMM6,%XMM10,%XMM1 |
(447) 0x30ad5 VPSUBD %XMM6,%XMM4,%XMM15 |
(447) 0x30ad9 VMOVAPD %YMM3,%YMM6 |
(447) 0x30add VFMADD213PD %YMM28,%YMM25,%YMM6 |
(447) 0x30ae3 VPMOVSXDQ %XMM1,%YMM1 |
(447) 0x30ae8 VPMOVSXDQ %XMM15,%YMM15 |
(447) 0x30aed VMOVQ %XMM1,%R10 |
(447) 0x30af2 VPEXTRQ $0x1,%XMM1,%R11 |
(447) 0x30af8 VEXTRACTI128 $0x1,%YMM1,%XMM1 |
(447) 0x30afe VMOVQ %XMM15,%R8 |
(447) 0x30b03 VPEXTRQ $0x1,%XMM15,%R15 |
(447) 0x30b09 VMOVQ %XMM1,%RBX |
(447) 0x30b0e VPEXTRQ $0x1,%XMM1,%R14 |
(447) 0x30b14 VEXTRACTI128 $0x1,%YMM15,%XMM1 |
(447) 0x30b1a VMOVSD (%RAX,%R10,8),%XMM15 |
(447) 0x30b20 VFMADD231PD %YMM14,%YMM26,%YMM6 |
(447) 0x30b26 VMOVQ %XMM1,%R12 |
(447) 0x30b2b VPEXTRQ $0x1,%XMM1,%R13 |
(447) 0x30b31 VMOVSD (%RAX,%RBX,8),%XMM1 |
(447) 0x30b36 VMOVHPD (%RAX,%R11,8),%XMM15,%XMM15 |
(447) 0x30b3c VMOVHPD (%RAX,%R14,8),%XMM1,%XMM1 |
(447) 0x30b42 VFMADD231PD %YMM13,%YMM27,%YMM6 |
(447) 0x30b48 VINSERTF128 $0x1,%XMM1,%YMM15,%YMM1 |
(447) 0x30b4e VMOVAPD %YMM5,%YMM15 |
(447) 0x30b52 VFMADD213PD %YMM24,%YMM21,%YMM15 |
(447) 0x30b58 VFMADD231PD %YMM2,%YMM22,%YMM15 |
(447) 0x30b5e VFMADD231PD %YMM16,%YMM23,%YMM15 |
(447) 0x30b64 VFMADD213PD %YMM8,%YMM1,%YMM15 |
(447) 0x30b69 VMOVSD (%RAX,%R12,8),%XMM1 |
(447) 0x30b6f VMOVSD (%RAX,%R8,8),%XMM8 |
(447) 0x30b75 VMOVHPD (%RAX,%R13,8),%XMM1,%XMM1 |
(447) 0x30b7b VMOVHPD (%RAX,%R15,8),%XMM8,%XMM8 |
(447) 0x30b81 VINSERTF128 $0x1,%XMM1,%YMM8,%YMM1 |
(447) 0x30b87 VMOVAPD %YMM3,%YMM8 |
(447) 0x30b8b VFMADD213PD %YMM24,%YMM21,%YMM8 |
(447) 0x30b91 VFMADD213PD %YMM9,%YMM29,%YMM3 |
(447) 0x30b97 VFMADD231PD %YMM14,%YMM22,%YMM8 |
(447) 0x30b9d VFMADD231PD %YMM14,%YMM30,%YMM3 |
(447) 0x30ba3 VFMADD231PD %YMM13,%YMM23,%YMM8 |
(447) 0x30ba9 VFMADD231PD %YMM13,%YMM31,%YMM3 |
(447) 0x30baf VFMADD213PD %YMM7,%YMM1,%YMM8 |
(447) 0x30bb4 VPADDD %XMM11,%XMM10,%XMM1 |
(447) 0x30bb9 VPADDD %XMM4,%XMM11,%XMM7 |
(447) 0x30bbd VPADDD %XMM4,%XMM12,%XMM4 |
(447) 0x30bc1 VPMOVSXDQ %XMM1,%YMM1 |
(447) 0x30bc6 VPMOVSXDQ %XMM7,%YMM7 |
(447) 0x30bcb VPMOVSXDQ %XMM4,%YMM4 |
(447) 0x30bd0 VPEXTRQ $0x1,%XMM1,%R10 |
(447) 0x30bd6 VMOVQ %XMM1,%R11 |
(447) 0x30bdb VEXTRACTI128 $0x1,%YMM1,%XMM1 |
(447) 0x30be1 VMOVQ %XMM7,%R8 |
(447) 0x30be6 VPEXTRQ $0x1,%XMM7,%R15 |
(447) 0x30bec VMOVQ %XMM1,%RBX |
(447) 0x30bf1 VPEXTRQ $0x1,%XMM1,%R14 |
(447) 0x30bf7 VEXTRACTI128 $0x1,%YMM7,%XMM1 |
(447) 0x30bfd VMOVSD (%RAX,%R11,8),%XMM7 |
(447) 0x30c03 VMOVQ %XMM1,%R12 |
(447) 0x30c08 VPEXTRQ $0x1,%XMM1,%R13 |
(447) 0x30c0e VMOVSD (%RAX,%RBX,8),%XMM1 |
(447) 0x30c13 VMOVHPD (%RAX,%R10,8),%XMM7,%XMM7 |
(447) 0x30c19 VPEXTRQ $0x1,%XMM4,%R10 |
(447) 0x30c1f VMOVHPD (%RAX,%R14,8),%XMM1,%XMM1 |
(447) 0x30c25 VINSERTF128 $0x1,%XMM1,%YMM7,%YMM1 |
(447) 0x30c2b VMOVAPD %YMM5,%YMM7 |
(447) 0x30c2f VFMADD213PD %YMM28,%YMM25,%YMM7 |
(447) 0x30c35 VFMADD213PD %YMM9,%YMM29,%YMM5 |
(447) 0x30c3b VFMADD231PD %YMM2,%YMM26,%YMM7 |
(447) 0x30c41 VFMADD231PD %YMM2,%YMM30,%YMM5 |
(447) 0x30c47 VFMADD231PD %YMM16,%YMM27,%YMM7 |
(447) 0x30c4d VFMADD231PD %YMM16,%YMM31,%YMM5 |
(447) 0x30c53 VFMADD213PD %YMM15,%YMM1,%YMM7 |
(447) 0x30c58 VMOVSD (%RAX,%R8,8),%XMM15 |
(447) 0x30c5e VMOVSD (%RAX,%R12,8),%XMM1 |
(447) 0x30c64 VMOVQ %XMM4,%R8 |
(447) 0x30c69 VMOVHPD (%RAX,%R13,8),%XMM1,%XMM1 |
(447) 0x30c6f VMOVHPD (%RAX,%R15,8),%XMM15,%XMM15 |
(447) 0x30c75 VINSERTF128 $0x1,%XMM1,%YMM15,%YMM1 |
(447) 0x30c7b VMOVAPD %YMM3,%YMM15 |
(447) 0x30c7f VFMADD213PD %YMM8,%YMM1,%YMM6 |
(447) 0x30c84 VPADDD %XMM12,%XMM10,%XMM1 |
(447) 0x30c89 VPMOVSXDQ %XMM1,%YMM1 |
(447) 0x30c8e VMOVQ %XMM1,%R11 |
(447) 0x30c93 VPEXTRQ $0x1,%XMM1,%RBX |
(447) 0x30c99 VEXTRACTI128 $0x1,%YMM1,%XMM1 |
(447) 0x30c9f VPEXTRQ $0x1,%XMM1,%R14 |
(447) 0x30ca5 VMOVQ %XMM1,%R15 |
(447) 0x30caa VEXTRACTI128 $0x1,%YMM4,%XMM1 |
(447) 0x30cb0 VMOVSD (%RAX,%R11,8),%XMM4 |
(447) 0x30cb6 VMOVQ %XMM1,%R12 |
(447) 0x30cbb VPEXTRQ $0x1,%XMM1,%R13 |
(447) 0x30cc1 VMOVSD (%RAX,%R15,8),%XMM1 |
(447) 0x30cc7 VMOVHPD (%RAX,%RBX,8),%XMM4,%XMM4 |
(447) 0x30ccc VMOVHPD (%RAX,%R14,8),%XMM1,%XMM1 |
(447) 0x30cd2 VMOVSD (%RAX,%R12,8),%XMM2 |
(447) 0x30cd8 VMOVHPD (%RAX,%R13,8),%XMM2,%XMM2 |
(447) 0x30cde VINSERTF128 $0x1,%XMM1,%YMM4,%YMM4 |
(447) 0x30ce4 VMOVAPD %YMM5,%YMM1 |
(447) 0x30ce8 VFMADD213PD %YMM7,%YMM4,%YMM1 |
(447) 0x30ced VMOVSD (%RAX,%R8,8),%XMM4 |
(447) 0x30cf3 VMOVHPD (%RAX,%R10,8),%XMM4,%XMM4 |
(447) 0x30cf9 VINSERTF128 $0x1,%XMM2,%YMM4,%YMM2 |
(447) 0x30cff VFMADD213PD %YMM6,%YMM2,%YMM15 |
(447) 0x30d04 CMP %RDI,%RSI |
(447) 0x30d07 JNE 309d0 |
0x30d0d VADDPD %YMM1,%YMM15,%YMM0 |
0x30d11 VMOVAPD 0xf0(%RSP),%XMM12 |
0x30d1a VMOVAPD 0xe0(%RSP),%XMM13 |
0x30d23 VMOVAPD 0x100(%RSP),%YMM14 |
0x30d2c VMOVAPD 0xd0(%RSP),%XMM15 |
0x30d35 VMOVAPD 0xc0(%RSP),%XMM16 |
0x30d3d VMOVAPD 0xb0(%RSP),%XMM30 |
0x30d45 VMOVAPD 0xa0(%RSP),%XMM31 |
0x30d4d VMOVAPD 0x90(%RSP),%XMM17 |
0x30d55 VMOVAPD 0x80(%RSP),%XMM2 |
0x30d5e VMOVAPD 0x70(%RSP),%XMM3 |
0x30d64 VMOVAPD 0x60(%RSP),%XMM4 |
0x30d6a VMOVAPD 0x50(%RSP),%XMM5 |
0x30d70 VMOVAPD 0x40(%RSP),%XMM6 |
0x30d76 VMOVAPD 0x30(%RSP),%XMM7 |
0x30d7c VMOVAPD 0x20(%RSP),%XMM8 |
0x30d82 VMOVAPD 0x10(%RSP),%XMM9 |
0x30d88 VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x30d8e VADDPD %XMM1,%XMM0,%XMM0 |
0x30d92 VPERMILPD $0x1,%XMM0,%XMM1 |
0x30d98 VADDSD %XMM1,%XMM0,%XMM0 |
0x30d9c CMP %RCX,%RDX |
0x30d9f JE 30e70 |
0x30da5 VUNPCKLPD %XMM8,%XMM2,%XMM1 |
0x30daa VUNPCKLPD %XMM5,%XMM3,%XMM2 |
0x30dae VUNPCKLPD %XMM6,%XMM4,%XMM4 |
0x30db2 VUNPCKLPD %XMM9,%XMM7,%XMM5 |
0x30db7 NOPW (%RAX,%RAX,1) |
(448) 0x30dc0 VMULSD (%R9,%RDX,8),%XMM12,%XMM3 |
(448) 0x30dc6 INC %RDX |
(448) 0x30dc9 VROUNDSD $0xb,%XMM3,%XMM3,%XMM6 |
(448) 0x30dcf VCVTTSD2SI %XMM3,%ESI |
(448) 0x30dd3 MOVSXD %ESI,%RSI |
(448) 0x30dd6 VSUBSD %XMM6,%XMM3,%XMM3 |
(448) 0x30dda VMULSD %XMM3,%XMM3,%XMM8 |
(448) 0x30dde VMOVDDUP %XMM3,%XMM6 |
(448) 0x30de2 VMULSD %XMM3,%XMM31,%XMM7 |
(448) 0x30de8 VUNPCKLPD %XMM6,%XMM8,%XMM9 |
(448) 0x30dec VMULSD %XMM3,%XMM8,%XMM3 |
(448) 0x30df0 VMULPD %XMM9,%XMM14,%XMM9 |
(448) 0x30df5 VUNPCKLPD %XMM8,%XMM3,%XMM11 |
(448) 0x30dfa VPERMILPD $0x1,%XMM9,%XMM10 |
(448) 0x30e00 VADDSD %XMM9,%XMM10,%XMM9 |
(448) 0x30e05 VUNPCKLPD %XMM3,%XMM8,%XMM10 |
(448) 0x30e09 VFMADD213PD %XMM5,%XMM2,%XMM10 |
(448) 0x30e0e VADDSD %XMM15,%XMM9,%XMM9 |
(448) 0x30e13 VFMADD213PD %XMM10,%XMM1,%XMM11 |
(448) 0x30e18 VFMADD231SD %XMM13,%XMM3,%XMM9 |
(448) 0x30e1d VMULSD %XMM3,%XMM16,%XMM3 |
(448) 0x30e23 VUNPCKLPD %XMM3,%XMM0,%XMM0 |
(448) 0x30e27 VFMADD231PD %XMM6,%XMM4,%XMM11 |
(448) 0x30e2c VMULPD 0x8(%RAX,%RSI,8),%XMM11,%XMM6 |
(448) 0x30e32 VUNPCKLPD %XMM8,%XMM9,%XMM3 |
(448) 0x30e37 VMOVSD (%RAX,%RSI,8),%XMM8 |
(448) 0x30e3c VUNPCKLPD %XMM30,%XMM8,%XMM8 |
(448) 0x30e42 VFMADD213PD %XMM0,%XMM3,%XMM8 |
(448) 0x30e47 VUNPCKLPD %XMM7,%XMM6,%XMM0 |
(448) 0x30e4b VSHUFPD $0x1,%XMM17,%XMM6,%XMM3 |
(448) 0x30e52 VADDPD %XMM0,%XMM8,%XMM0 |
(448) 0x30e56 VADDPD %XMM3,%XMM0,%XMM0 |
(448) 0x30e5a VPERMILPD $0x1,%XMM0,%XMM3 |
(448) 0x30e60 VFMADD231SD 0x18(%RAX,%RSI,8),%XMM3,%XMM0 |
(448) 0x30e67 CMP %RDX,%RCX |
(448) 0x30e6a JNE 30dc0 |
0x30e70 LEA -0x28(%RBP),%RSP |
0x30e74 POP %RBX |
0x30e75 POP %R12 |
0x30e77 POP %R13 |
0x30e79 POP %R14 |
0x30e7b POP %R15 |
0x30e7d POP %RBP |
0x30e7e VZEROUPPER |
0x30e81 RET |
0x30e82 INT $0x3 |
0x30e83 INT $0x3 |
0x30e84 INT $0x3 |
0x30e85 INT $0x3 |
0x30e86 INT $0x3 |
0x30e87 INT $0x3 |
0x30e88 INT $0x3 |
0x30e89 INT $0x3 |
0x30e8a INT $0x3 |
0x30e8b INT $0x3 |
0x30e8c INT $0x3 |
0x30e8d INT $0x3 |
0x30e8e INT $0x3 |
0x30e8f INT $0x3 |
Path / |
Source file and lines | BsplineFunctor.h:228-262 |
Module | libqmcwfs.so |
nb instructions | 143 |
nb uops | 126 |
loop length | 685 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 19 |
used ymm registers | 20 |
used zmm registers | 0 |
nb stack references | 17 |
micro-operation queue | 21.00 cycles |
front end | 21.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.50 | 4.50 | 4.25 | 4.25 | 3.50 | 17.33 | 17.33 | 17.33 | 1.00 | 8.50 | 8.50 | 8.00 | 8.00 | 8.00 |
cycles | 4.50 | 4.50 | 4.25 | 4.25 | 3.50 | 17.33 | 17.33 | 17.33 | 1.00 | 8.50 | 8.50 | 8.00 | 8.00 | 8.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 21.00 |
Dispatch | 17.33 |
Overall L1 | 21.00 |
all | 10% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 53% |
load | 51% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 28% |
all | 48% |
load | 48% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 24% |
all | 11% |
load | 6% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 20% |
load | 19% |
store | 26% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 29% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 19% |
load | 18% |
store | 26% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB $0x140,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %EDX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JLE 30e70 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD 0x8(%RDI),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD %EDX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %ESI,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R8,%RDX,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 30818 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x48> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
TEST %EDX,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 30e70 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD 0x238(%RDI),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x18(%RDI),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVUPD 0x20(%RDI),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD 0x30(%RDI),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x38(%RDI),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x40(%RDI),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x48(%RDI),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x58(%RDI),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x60(%RDI),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x68(%RDI),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x50(%RDI),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x70(%RDI),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x78(%RDI),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x80(%RDI),%XMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x88(%RDI),%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x90(%RDI),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x218(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EDX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x8,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 308b2 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0xe2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 30da5 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x5d5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VMOVAPD %XMM12,0xf0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM12,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTD -0x27171(%RIP),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTD -0x2718a(%RIP),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM17,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x3,%RSI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM30,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM31,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM13,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM14,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VPERMPD $0x55,%YMM14,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD %XMM15,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM2,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM3,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM4,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM7,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM5,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM8,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM6,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM9,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM9,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM16,%YMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM30,%YMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM31,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM1,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
MOV %ECX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %XMM15,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM1,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
AND $-0x8,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %XMM13,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %YMM14,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM2,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM3,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM4,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM7,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM5,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM6,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM16,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x6,%RSI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VADDPD %YMM1,%YMM15,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xf0(%RSP),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xe0(%RSP),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x100(%RSP),%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xd0(%RSP),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xc0(%RSP),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xb0(%RSP),%XMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xa0(%RSP),%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x90(%RSP),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x80(%RSP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x70(%RSP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x60(%RSP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x50(%RSP),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x40(%RSP),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x30(%RSP),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x20(%RSP),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x10(%RSP),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VADDPD %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
CMP %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 30e70 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VUNPCKLPD %XMM8,%XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM5,%XMM3,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM6,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM9,%XMM7,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Source file and lines | BsplineFunctor.h:228-262 |
Module | libqmcwfs.so |
nb instructions | 143 |
nb uops | 126 |
loop length | 685 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 19 |
used ymm registers | 20 |
used zmm registers | 0 |
nb stack references | 17 |
micro-operation queue | 21.00 cycles |
front end | 21.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.50 | 4.50 | 4.25 | 4.25 | 3.50 | 17.33 | 17.33 | 17.33 | 1.00 | 8.50 | 8.50 | 8.00 | 8.00 | 8.00 |
cycles | 4.50 | 4.50 | 4.25 | 4.25 | 3.50 | 17.33 | 17.33 | 17.33 | 1.00 | 8.50 | 8.50 | 8.00 | 8.00 | 8.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 21.00 |
Dispatch | 17.33 |
Overall L1 | 21.00 |
all | 10% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 53% |
load | 51% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 28% |
all | 48% |
load | 48% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 24% |
all | 11% |
load | 6% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 20% |
load | 19% |
store | 26% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 29% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 19% |
load | 18% |
store | 26% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB $0x140,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %EDX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JLE 30e70 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD 0x8(%RDI),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD %EDX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %ESI,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R8,%RDX,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 30818 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x48> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
TEST %EDX,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 30e70 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD 0x238(%RDI),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x18(%RDI),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVUPD 0x20(%RDI),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD 0x30(%RDI),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x38(%RDI),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x40(%RDI),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x48(%RDI),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x58(%RDI),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x60(%RDI),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x68(%RDI),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x50(%RDI),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x70(%RDI),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x78(%RDI),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x80(%RDI),%XMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x88(%RDI),%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x90(%RDI),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x218(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EDX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x8,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 308b2 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0xe2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 30da5 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x5d5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VMOVAPD %XMM12,0xf0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM12,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTD -0x27171(%RIP),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPBROADCASTD -0x2718a(%RIP),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM17,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x3,%RSI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM30,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM31,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM13,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM14,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VPERMPD $0x55,%YMM14,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD %XMM15,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM2,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM3,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM4,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM7,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM5,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM8,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM6,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM9,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM9,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM16,%YMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM30,%YMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM31,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM1,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
MOV %ECX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %XMM15,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM1,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
AND $-0x8,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %XMM13,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %YMM14,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM2,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM3,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM4,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM7,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM5,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM6,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM16,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x6,%RSI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VADDPD %YMM1,%YMM15,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xf0(%RSP),%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xe0(%RSP),%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x100(%RSP),%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xd0(%RSP),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xc0(%RSP),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xb0(%RSP),%XMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0xa0(%RSP),%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x90(%RSP),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x80(%RSP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x70(%RSP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x60(%RSP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x50(%RSP),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x40(%RSP),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x30(%RSP),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x20(%RSP),%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x10(%RSP),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VADDPD %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
CMP %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 30e70 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VUNPCKLPD %XMM8,%XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM5,%XMM3,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM6,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM9,%XMM7,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 | |||||||||||||||||
INT $0x3 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼qmcplusplus::BsplineFunctor | 1.56 | 2.93 |
○Loop 449 - BsplineFunctor.h:236-241 - libqmcwfs.so | 1.5 | 2.75 |
○Loop 447 - BsplineFunctor.h:246-260 - libqmcwfs.so | 0.02 | 0.04 |
○Loop 448 - BsplineFunctor.h:246-260 - libqmcwfs.so | 0.01 | 0.01 |