Function: qmcplusplus::BsplineFunctor<double>::evaluateV(int, int, int, double const*, double*) cons ... | Module: exec | Source: BsplineFunctor.h:228-262 [...] | Coverage: 1.56% |
---|
Function: qmcplusplus::BsplineFunctor<double>::evaluateV(int, int, int, double const*, double*) cons ... | Module: exec | Source: BsplineFunctor.h:228-262 [...] | Coverage: 1.56% |
---|
/home/kcamus/qaas_runs/169-390-4082/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 228 - 262 |
-------------------------------------------------------------------------------- |
228: { |
229: const real_type* restrict distArray = _distArray + iStart; |
230: |
231: ASSUME_ALIGNED(distArrayCompressed); |
232: int iCount = 0; |
233: const int iLimit = iEnd - iStart; |
234: |
235: #pragma vector always |
236: for (int jat = 0; jat < iLimit; jat++) |
237: { |
238: real_type r = distArray[jat]; |
239: // pick the distances smaller than the cutoff and avoid the reference atom |
240: if (r < cutoff_radius && iStart + jat != iat) |
241: distArrayCompressed[iCount++] = distArray[jat]; |
242: } |
243: |
244: real_type d = 0.0; |
245: //#pragma omp simd reduction(+:d) |
246: for (int jat = 0; jat < iCount; jat++) |
247: { |
248: real_type r = distArrayCompressed[jat]; |
249: r *= DeltaRInv; |
250: int i = (int)r; |
251: real_type t = r - real_type(i); |
252: real_type tp0 = t * t * t; |
253: real_type tp1 = t * t; |
254: real_type tp2 = t; |
255: |
256: real_type d1 = SplineCoefs[i + 0] * (A[0] * tp0 + A[1] * tp1 + A[2] * tp2 + A[3]); |
257: real_type d2 = SplineCoefs[i + 1] * (A[4] * tp0 + A[5] * tp1 + A[6] * tp2 + A[7]); |
258: real_type d3 = SplineCoefs[i + 2] * (A[8] * tp0 + A[9] * tp1 + A[10] * tp2 + A[11]); |
259: real_type d4 = SplineCoefs[i + 3] * (A[12] * tp0 + A[13] * tp1 + A[14] * tp2 + A[15]); |
260: d += (d1 + d2 + d3 + d4); |
261: } |
262: return d; |
/usr/lib64/gcc/x86_64-pc-linux-gnu/13.1.1/../../../../include/c++/13.1.1/bits/stl_vector.h: 1145 - 1145 |
-------------------------------------------------------------------------------- |
1145: return *(this->_M_impl._M_start + __n); |
0x41e9d0 PUSH %RBP |
0x41e9d1 MOV %RSP,%RBP |
0x41e9d4 PUSH %R15 |
0x41e9d6 PUSH %R14 |
0x41e9d8 PUSH %R13 |
0x41e9da PUSH %R12 |
0x41e9dc PUSH %RBX |
0x41e9dd SUB %EDX,%ECX |
0x41e9df VXORPD %XMM0,%XMM0,%XMM0 |
0x41e9e3 TEST %ECX,%ECX |
0x41e9e5 JLE 41ef6a |
0x41e9eb MOVSXD %EDX,%R10 |
0x41e9ee VMOVSD 0x8(%RDI),%XMM1 |
0x41e9f3 MOV %ECX,%R11D |
0x41e9f6 MOV $-0x10,%R12D |
0x41e9fc AND %R11,%R12 |
0x41e9ff JE 41ed8a |
0x41ea05 VPBROADCASTD %ESI,%YMM2 |
0x41ea0b VBROADCASTSD %XMM1,%YMM3 |
0x41ea10 VMOVQ %R9,%XMM4 |
0x41ea15 MOV %EDX,%R14D |
0x41ea18 LEA (%R8,%R10,8),%R15 |
0x41ea1c VPXOR %XMM5,%XMM5,%XMM5 |
0x41ea20 XOR %EAX,%EAX |
0x41ea22 VMOVDQU 0x79876(%RIP),%YMM8 |
0x41ea2a VMOVDQU64 0x7984c(%RIP),%YMM16 |
0x41ea34 VPXOR %XMM6,%XMM6,%XMM6 |
0x41ea38 VPXOR %XMM9,%XMM9,%XMM9 |
0x41ea3d NOPL (%RAX) |
(302) 0x41ea40 VMOVUPD (%R15,%RAX,8),%YMM10 |
(302) 0x41ea46 VMOVUPD 0x20(%R15,%RAX,8),%YMM11 |
(302) 0x41ea4d VMOVUPD 0x40(%R15,%RAX,8),%YMM12 |
(302) 0x41ea54 VMOVUPD 0x60(%R15,%RAX,8),%YMM13 |
(302) 0x41ea5b LEA (%R14,%RAX,1),%EBX |
(302) 0x41ea5f VPBROADCASTD %EBX,%YMM14 |
(302) 0x41ea65 VPADDD %YMM8,%YMM14,%YMM15 |
(302) 0x41ea6a VPADDD %YMM16,%YMM14,%YMM14 |
(302) 0x41ea70 VPCMPNEQD %YMM2,%YMM14,%K2 |
(302) 0x41ea77 VPCMPNEQD %YMM2,%YMM15,%K0 |
(302) 0x41ea7e VCMPPD $0x1,%YMM3,%YMM12,%K1 |
(302) 0x41ea85 VCMPPD $0x1,%YMM3,%YMM10,%K5 |
(302) 0x41ea8c VCMPPD $0x1,%YMM3,%YMM11,%K3 |
(302) 0x41ea93 KSHIFTLB $0x4,%K3,%K4 |
(302) 0x41ea99 KORB %K4,%K5,%K4 |
(302) 0x41ea9d VPMOVSXDQ %XMM9,%XMM7 |
(302) 0x41eaa2 VPSLLQ $0x3,%XMM7,%XMM7 |
(302) 0x41eaa7 VPADDQ %XMM7,%XMM4,%XMM7 |
(302) 0x41eaab VMOVQ %XMM7,%RBX |
(302) 0x41eab0 KANDW %K0,%K1,%K7 |
(302) 0x41eab4 KANDB %K2,%K4,%K4 |
(302) 0x41eab8 KMOVB %K4,%ECX |
(302) 0x41eabc POPCNT %ECX,%ECX |
(302) 0x41eac0 VCOMPRESSPD %YMM12,(%RBX,%RCX,8){%K7} |
(302) 0x41eac7 VCMPPD $0x1,%YMM3,%YMM13,%K6 |
(302) 0x41eace KMOVB %K7,-0x2a(%RBP) |
(302) 0x41ead3 LEA (%RBX,%RCX,8),%R13 |
(302) 0x41ead7 KSHIFTRB $0x4,%K0,%K7 |
(302) 0x41eadd KANDW %K7,%K6,%K7 |
(302) 0x41eae1 MOVZX -0x2a(%RBP),%ECX |
(302) 0x41eae5 POPCNT %ECX,%ECX |
(302) 0x41eae9 VCOMPRESSPD %YMM13,(%R13,%RCX,8){%K7} |
(302) 0x41eaf1 KANDW %K2,%K5,%K5 |
(302) 0x41eaf5 KSHIFTRB $0x4,%K2,%K2 |
(302) 0x41eafb KANDW %K2,%K3,%K2 |
(302) 0x41eaff KMOVB %K5,-0x29(%RBP) |
(302) 0x41eb04 MOVZX -0x29(%RBP),%ECX |
(302) 0x41eb08 POPCNT %ECX,%ECX |
(302) 0x41eb0c VCOMPRESSPD %YMM11,(%RBX,%RCX,8){%K2} |
(302) 0x41eb13 VCOMPRESSPD %YMM10,(%RBX){%K5} |
(302) 0x41eb19 KSHIFTLB $0x4,%K6,%K2 |
(302) 0x41eb1f KORB %K2,%K1,%K1 |
(302) 0x41eb23 VPMOVM2D %K4,%YMM7 |
(302) 0x41eb29 KANDB %K0,%K1,%K0 |
(302) 0x41eb2d VPMOVM2D %K0,%YMM10 |
(302) 0x41eb33 VPSUBD %YMM7,%YMM9,%YMM7 |
(302) 0x41eb37 VPSUBD %YMM10,%YMM5,%YMM9 |
(302) 0x41eb3c VPADDD %YMM7,%YMM9,%YMM7 |
(302) 0x41eb40 VEXTRACTI128 $0x1,%YMM7,%XMM9 |
(302) 0x41eb46 VPADDD %YMM7,%YMM9,%YMM7 |
(302) 0x41eb4a VPSHUFD $-0x12,%XMM7,%XMM9 |
(302) 0x41eb4f VPADDD %YMM7,%YMM9,%YMM7 |
(302) 0x41eb53 VPSHUFD $0x55,%XMM7,%XMM9 |
(302) 0x41eb58 VPADDD %YMM7,%YMM9,%YMM10 |
(302) 0x41eb5c VPBLENDW $0x3,%XMM10,%XMM6,%XMM9 |
(302) 0x41eb62 ADD $0x10,%RAX |
(302) 0x41eb66 CMP %R12,%RAX |
(302) 0x41eb69 JB 41ea40 |
0x41eb6f VMOVD %XMM10,%EAX |
0x41eb73 CMP %R11,%R12 |
0x41eb76 JNE 41ed8f |
0x41eb7c TEST %EAX,%EAX |
0x41eb7e JLE 41ef6a |
0x41eb84 VMOVSD 0x238(%RDI),%XMM2 |
0x41eb8c VMOVSD 0x18(%RDI),%XMM7 |
0x41eb91 VMOVSD 0x20(%RDI),%XMM1 |
0x41eb96 VMOVSD 0x28(%RDI),%XMM5 |
0x41eb9b VMOVSD 0x30(%RDI),%XMM4 |
0x41eba0 VMOVSD 0x38(%RDI),%XMM6 |
0x41eba5 VMOVSD 0x40(%RDI),%XMM3 |
0x41ebaa VMOVSD 0x48(%RDI),%XMM0 |
0x41ebaf VMOVSD 0x50(%RDI),%XMM14 |
0x41ebb4 VMOVSD 0x58(%RDI),%XMM22 |
0x41ebbb VMOVSD 0x60(%RDI),%XMM21 |
0x41ebc2 VMOVSD 0x68(%RDI),%XMM9 |
0x41ebc7 VMOVSD 0x70(%RDI),%XMM8 |
0x41ebcc VMOVSD 0x78(%RDI),%XMM12 |
0x41ebd1 VMOVSD 0x80(%RDI),%XMM20 |
0x41ebd8 VMOVSD 0x88(%RDI),%XMM15 |
0x41ebe0 MOV 0x218(%RDI),%RDX |
0x41ebe7 VMOVSD 0x90(%RDI),%XMM16 |
0x41ebee MOV %EAX,%ECX |
0x41ebf0 MOV $-0x4,%EAX |
0x41ebf5 AND %RCX,%RAX |
0x41ebf8 JE 41edc9 |
0x41ebfe VBROADCASTSD %XMM2,%YMM18 |
0x41ec04 VBROADCASTSD %XMM7,%YMM24 |
0x41ec0a VBROADCASTSD %XMM1,%YMM1 |
0x41ec0f VBROADCASTSD %XMM5,%YMM5 |
0x41ec14 VBROADCASTSD %XMM4,%YMM25 |
0x41ec1a VPBROADCASTQ %RDX,%YMM10 |
0x41ec20 VBROADCASTSD %XMM6,%YMM17 |
0x41ec26 VBROADCASTSD %XMM3,%YMM11 |
0x41ec2b VBROADCASTSD %XMM0,%YMM13 |
0x41ec30 VBROADCASTSD %XMM14,%YMM14 |
0x41ec35 VBROADCASTSD %XMM22,%YMM6 |
0x41ec3b VBROADCASTSD %XMM21,%YMM3 |
0x41ec41 VBROADCASTSD %XMM9,%YMM9 |
0x41ec46 VBROADCASTSD %XMM8,%YMM8 |
0x41ec4b VBROADCASTSD %XMM12,%YMM12 |
0x41ec50 VBROADCASTSD %XMM20,%YMM7 |
0x41ec56 VBROADCASTSD %XMM15,%YMM15 |
0x41ec5b VBROADCASTSD %XMM16,%YMM16 |
0x41ec61 VXORPD %XMM19,%XMM19,%XMM19 |
0x41ec67 XOR %ESI,%ESI |
0x41ec69 VPBROADCASTD 0x79692(%RIP),%XMM0 |
0x41ec72 NOPW %CS:(%RAX,%RAX,1) |
(300) 0x41ec80 VMULPD (%R9,%RSI,8),%YMM18,%YMM2 |
(300) 0x41ec87 VCVTTPD2DQ %YMM2,%XMM4 |
(300) 0x41ec8b KXNORW %K0,%K0,%K1 |
(300) 0x41ec8f VXORPD %XMM20,%XMM20,%XMM20 |
(300) 0x41ec95 VGATHERDPD (%RDX,%XMM4,8),%YMM20{%K1} |
(300) 0x41ec9c KXNORW %K0,%K0,%K1 |
(300) 0x41eca0 VXORPD %XMM21,%XMM21,%XMM21 |
(300) 0x41eca6 VGATHERDPD 0x8(%RDX,%XMM4,8),%YMM21{%K1} |
(300) 0x41ecae KXNORW %K0,%K0,%K1 |
(300) 0x41ecb2 VXORPD %XMM22,%XMM22,%XMM22 |
(300) 0x41ecb8 VGATHERDPD 0x10(%RDX,%XMM4,8),%YMM22{%K1} |
(300) 0x41ecc0 VRNDSCALEPD $0xb,%YMM2,%YMM23 |
(300) 0x41ecc7 VSUBPD %YMM23,%YMM2,%YMM2 |
(300) 0x41eccd VPADDD %XMM0,%XMM4,%XMM4 |
(300) 0x41ecd1 KXNORW %K0,%K0,%K1 |
(300) 0x41ecd5 VXORPD %XMM23,%XMM23,%XMM23 |
(300) 0x41ecdb VGATHERDPD (%RDX,%XMM4,8),%YMM23{%K1} |
(300) 0x41ece2 VMOVAPD %YMM2,%YMM4 |
(300) 0x41ece6 VFMADD213PD %YMM1,%YMM24,%YMM4 |
(300) 0x41ecec VFMADD213PD %YMM5,%YMM2,%YMM4 |
(300) 0x41ecf1 VFMADD213PD %YMM25,%YMM2,%YMM4 |
(300) 0x41ecf7 VFMADD213PD %YMM19,%YMM20,%YMM4 |
(300) 0x41ecfd VMOVAPD %YMM2,%YMM19 |
(300) 0x41ed03 VFMADD213PD %YMM11,%YMM17,%YMM19 |
(300) 0x41ed09 VFMADD213PD %YMM13,%YMM2,%YMM19 |
(300) 0x41ed0f VFMADD213PD %YMM14,%YMM2,%YMM19 |
(300) 0x41ed15 VFMADD213PD %YMM4,%YMM21,%YMM19 |
(300) 0x41ed1b VMOVAPD %YMM2,%YMM4 |
(300) 0x41ed1f VFMADD213PD %YMM3,%YMM6,%YMM4 |
(300) 0x41ed24 VFMADD213PD %YMM9,%YMM2,%YMM4 |
(300) 0x41ed29 VFMADD213PD %YMM8,%YMM2,%YMM4 |
(300) 0x41ed2e VFMADD213PD %YMM19,%YMM22,%YMM4 |
(300) 0x41ed34 VMOVAPD %YMM2,%YMM19 |
(300) 0x41ed3a VFMADD213PD %YMM7,%YMM12,%YMM19 |
(300) 0x41ed40 VFMADD213PD %YMM15,%YMM2,%YMM19 |
(300) 0x41ed46 VFMADD213PD %YMM16,%YMM2,%YMM19 |
(300) 0x41ed4c VFMADD213PD %YMM4,%YMM23,%YMM19 |
(300) 0x41ed52 ADD $0x4,%RSI |
(300) 0x41ed56 CMP %RAX,%RSI |
(300) 0x41ed59 JB 41ec80 |
0x41ed5f VEXTRACTF32X4 $0x1,%YMM19,%XMM0 |
0x41ed66 VADDPD %XMM0,%XMM19,%XMM0 |
0x41ed6c VPERMILPD $0x1,%XMM0,%XMM2 |
0x41ed72 VADDSD %XMM2,%XMM0,%XMM0 |
0x41ed76 CMP %RCX,%RAX |
0x41ed79 JE 41ef6a |
0x41ed7f VPBROADCASTQ %RCX,%YMM19 |
0x41ed85 JMP 41ee38 |
0x41ed8a XOR %R12D,%R12D |
0x41ed8d XOR %EAX,%EAX |
0x41ed8f SUB %EDX,%ESI |
0x41ed91 LEA (%R8,%R10,8),%RDX |
0x41ed95 JMP 41edac |
0x41ed97 NOPW (%RAX,%RAX,1) |
(301) 0x41eda0 INC %R12 |
(301) 0x41eda3 CMP %R12,%R11 |
(301) 0x41eda6 JE 41eb7c |
(301) 0x41edac VMOVSD (%RDX,%R12,8),%XMM2 |
(301) 0x41edb2 VUCOMISD %XMM2,%XMM1 |
(301) 0x41edb6 JBE 41eda0 |
(301) 0x41edb8 CMP %R12D,%ESI |
(301) 0x41edbb JE 41eda0 |
(301) 0x41edbd CLTQ |
(301) 0x41edbf VMOVSD %XMM2,(%R9,%RAX,8) |
(301) 0x41edc5 INC %EAX |
(301) 0x41edc7 JMP 41eda0 |
0x41edc9 VPBROADCASTQ %RCX,%YMM19 |
0x41edcf VBROADCASTSD %XMM2,%YMM18 |
0x41edd5 VBROADCASTSD %XMM7,%YMM24 |
0x41eddb VBROADCASTSD %XMM1,%YMM1 |
0x41ede0 VBROADCASTSD %XMM5,%YMM5 |
0x41ede5 VBROADCASTSD %XMM4,%YMM25 |
0x41edeb VPBROADCASTQ %RDX,%YMM10 |
0x41edf1 VBROADCASTSD %XMM6,%YMM17 |
0x41edf7 VBROADCASTSD %XMM3,%YMM11 |
0x41edfc VBROADCASTSD %XMM0,%YMM13 |
0x41ee01 VBROADCASTSD %XMM14,%YMM14 |
0x41ee06 VBROADCASTSD %XMM22,%YMM6 |
0x41ee0c VBROADCASTSD %XMM21,%YMM3 |
0x41ee12 VBROADCASTSD %XMM9,%YMM9 |
0x41ee17 VBROADCASTSD %XMM8,%YMM8 |
0x41ee1c VBROADCASTSD %XMM12,%YMM12 |
0x41ee21 VBROADCASTSD %XMM20,%YMM7 |
0x41ee27 VBROADCASTSD %XMM15,%YMM15 |
0x41ee2c VBROADCASTSD %XMM16,%YMM16 |
0x41ee32 VXORPD %XMM0,%XMM0,%XMM0 |
0x41ee36 XOR %EAX,%EAX |
0x41ee38 VPBROADCASTQ %RAX,%YMM20 |
0x41ee3e VPADDQ 0x793b8(%RIP),%YMM20,%YMM20 |
0x41ee48 VPCMPLTUQ %YMM19,%YMM20,%K1 |
0x41ee4f VMOVUPD (%R9,%RAX,8),%YMM19{%K1}{z} |
0x41ee56 VMULPD %YMM19,%YMM18,%YMM18 |
0x41ee5c VCVTTPD2DQ %YMM18,%XMM19 |
0x41ee62 VRNDSCALEPD $0xb,%YMM18,%YMM20 |
0x41ee69 VSUBPD %YMM20,%YMM18,%YMM18 |
0x41ee6f VPMOVSXDQ %XMM19,%YMM20 |
0x41ee75 VPSLLQ $0x3,%YMM20,%YMM20 |
0x41ee7c VPADDQ %YMM20,%YMM10,%YMM20 |
0x41ee82 VXORPD %XMM21,%XMM21,%XMM21 |
0x41ee88 KMOVQ %K1,%K2 |
0x41ee8d VXORPD %XMM22,%XMM22,%XMM22 |
0x41ee93 VGATHERQPD (,%YMM20,1),%YMM22{%K2} |
0x41ee9e KMOVQ %K1,%K2 |
0x41eea3 VXORPD %XMM23,%XMM23,%XMM23 |
0x41eea9 VGATHERQPD 0x8(,%YMM20,1),%YMM23{%K2} |
0x41eeb4 VFMADD231PD %YMM17,%YMM18,%YMM11 |
0x41eeba KMOVQ %K1,%K2 |
0x41eebf VXORPD %XMM17,%XMM17,%XMM17 |
0x41eec5 VGATHERQPD 0x10(,%YMM20,1),%YMM17{%K2} |
0x41eed0 VFMADD213PD %YMM13,%YMM18,%YMM11 |
0x41eed6 VFMADD213PD %YMM14,%YMM18,%YMM11 |
0x41eedc VPADDD 0x7941e(%RIP){1to4},%XMM19,%XMM2 |
0x41eee6 VPMOVSXDQ %XMM2,%YMM2 |
0x41eeeb VPSLLQ $0x3,%YMM2,%YMM2 |
0x41eef0 VPADDQ %YMM2,%YMM10,%YMM2 |
0x41eef4 KMOVQ %K1,%K2 |
0x41eef9 VGATHERQPD (,%YMM2,1),%YMM21{%K2} |
0x41ef04 VMULPD %YMM23,%YMM11,%YMM2 |
0x41ef0a VFMADD231PD %YMM12,%YMM18,%YMM7 |
0x41ef10 VFMADD213PD %YMM15,%YMM18,%YMM7 |
0x41ef16 VFMADD213PD %YMM16,%YMM18,%YMM7 |
0x41ef1c VFMADD231PD %YMM24,%YMM18,%YMM1 |
0x41ef22 VFMADD213PD %YMM5,%YMM18,%YMM1 |
0x41ef28 VFMADD213PD %YMM25,%YMM18,%YMM1 |
0x41ef2e VFMADD213PD %YMM2,%YMM22,%YMM1 |
0x41ef34 VFMADD231PD %YMM6,%YMM18,%YMM3 |
0x41ef3a VFMADD213PD %YMM9,%YMM18,%YMM3 |
0x41ef40 VFMADD213PD %YMM8,%YMM18,%YMM3 |
0x41ef46 VFMADD213PD %YMM1,%YMM17,%YMM3 |
0x41ef4c VFMADD231PD %YMM21,%YMM7,%YMM3{%K1}{z} |
0x41ef52 VEXTRACTF128 $0x1,%YMM3,%XMM1 |
0x41ef58 VADDPD %XMM1,%XMM3,%XMM1 |
0x41ef5c VPERMILPD $0x1,%XMM1,%XMM2 |
0x41ef62 VADDSD %XMM2,%XMM1,%XMM1 |
0x41ef66 VADDSD %XMM1,%XMM0,%XMM0 |
0x41ef6a POP %RBX |
0x41ef6b POP %R12 |
0x41ef6d POP %R13 |
0x41ef6f POP %R14 |
0x41ef71 POP %R15 |
0x41ef73 POP %RBP |
0x41ef74 VZEROUPPER |
0x41ef77 RET |
0x41ef78 NOPL (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►66.67+ | miniqmcreference::TwoBodyJastr[...] | TwoBodyJastrowRef.h:130 | exec |
○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:219 | exec |
○ | main.extracted.104 | refwrap.h:347 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 | |
►33.33+ | miniqmcreference::OneBodyJastr[...] | OneBodyJastrowRef.h:150 | exec |
○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:219 | exec |
○ | main.extracted.104 | refwrap.h:347 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | BsplineFunctor.h:228-262 |
Module | exec |
nb instructions | 170 |
nb uops | 186 |
loop length | 889 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 21 |
used ymm registers | 24 |
used zmm registers | 0 |
nb stack references | 0 |
ADD-SUB / MUL ratio | 3.00 |
micro-operation queue | 46.50 cycles |
front end | 46.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 21.50 | 21.50 | 23.50 | 23.50 | 6.00 | 53.00 | 20.00 | 7.00 |
cycles | 21.50 | 21.50 | 23.50 | 23.50 | 6.00 | 53.00 | 20.00 | 7.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 72.61 |
Stall cycles | 28.30 |
ROB full (events) | 31.03 |
Front-end | 46.50 |
Dispatch | 53.00 |
Overall L1 | 53.00 |
all | 41% |
load | 80% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 80% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 31% |
all | 40% |
load | 21% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 50% |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 32% |
all | 40% |
load | 32% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 63% |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 32% |
all | 21% |
load | 36% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 36% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 16% |
all | 24% |
load | 20% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 50% |
add-sub | 22% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 19% |
all | 23% |
load | 23% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 50% |
add-sub | 28% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB %EDX,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 41ef6a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %EDX,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD 0x8(%RDI),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %ECX,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $-0x10,%R12D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND %R11,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41ed8a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VPBROADCASTD %ESI,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM1,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVQ %R9,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
MOV %EDX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%R8,%R10,8),%R15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVDQU 0x79876(%RIP),%YMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVDQU64 0x7984c(%RIP),%YMM16 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VPXOR %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPXOR %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVD %XMM10,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP %R11,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 41ed8f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 41ef6a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD 0x238(%RDI),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x18(%RDI),%XMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x20(%RDI),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x28(%RDI),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x30(%RDI),%XMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x38(%RDI),%XMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x40(%RDI),%XMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x48(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x50(%RDI),%XMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x58(%RDI),%XMM22 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x60(%RDI),%XMM21 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x68(%RDI),%XMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x70(%RDI),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x78(%RDI),%XMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x80(%RDI),%XMM20 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x88(%RDI),%XMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x218(%RDI),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x90(%RDI),%XMM16 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $-0x4,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND %RCX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41edc9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VBROADCASTSD %XMM2,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM4,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RDX,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM6,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM0,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM14,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM22,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM12,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM20,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM16,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VXORPD %XMM19,%XMM19,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTD 0x79692(%RIP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VEXTRACTF32X4 $0x1,%YMM19,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %XMM0,%XMM19,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSD %XMM2,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %RCX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41ef6a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VPBROADCASTQ %RCX,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
JMP 41ee38 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %EDX,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R8,%R10,8),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 41edac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %RCX,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM2,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM4,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RDX,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM6,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM0,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM14,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM22,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM12,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM20,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM16,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %RAX,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPADDQ 0x793b8(%RIP),%YMM20,%YMM20 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPCMPLTUQ %YMM19,%YMM20,%K1 | |||||||||||
VMOVUPD (%R9,%RAX,8),%YMM19{%K1}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM19,%YMM18,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTTPD2DQ %YMM18,%XMM19 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VRNDSCALEPD $0xb,%YMM18,%YMM20 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBPD %YMM20,%YMM18,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPMOVSXDQ %XMM19,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPSLLQ $0x3,%YMM20,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %YMM20,%YMM10,%YMM20 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VXORPD %XMM21,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM22,%XMM22,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD (,%YMM20,1),%YMM22{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM23,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD 0x8(,%YMM20,1),%YMM23{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VFMADD231PD %YMM17,%YMM18,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM17,%XMM17,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD 0x10(,%YMM20,1),%YMM17{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VFMADD213PD %YMM13,%YMM18,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM14,%YMM18,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDD 0x7941e(%RIP){1to4},%XMM19,%XMM2 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPMOVSXDQ %XMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPSLLQ $0x3,%YMM2,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %YMM2,%YMM10,%YMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (,%YMM2,1),%YMM21{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VMULPD %YMM23,%YMM11,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM12,%YMM18,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM15,%YMM18,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM16,%YMM18,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM24,%YMM18,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM5,%YMM18,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM25,%YMM18,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM2,%YMM22,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM6,%YMM18,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM9,%YMM18,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM8,%YMM18,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM1,%YMM17,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM21,%YMM7,%YMM3{%K1}{z} | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM3,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %XMM1,%XMM3,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSD %XMM2,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | BsplineFunctor.h:228-262 |
Module | exec |
nb instructions | 170 |
nb uops | 186 |
loop length | 889 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 21 |
used ymm registers | 24 |
used zmm registers | 0 |
nb stack references | 0 |
ADD-SUB / MUL ratio | 3.00 |
micro-operation queue | 46.50 cycles |
front end | 46.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 21.50 | 21.50 | 23.50 | 23.50 | 6.00 | 53.00 | 20.00 | 7.00 |
cycles | 21.50 | 21.50 | 23.50 | 23.50 | 6.00 | 53.00 | 20.00 | 7.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 72.61 |
Stall cycles | 28.30 |
ROB full (events) | 31.03 |
Front-end | 46.50 |
Dispatch | 53.00 |
Overall L1 | 53.00 |
all | 41% |
load | 80% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 80% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 31% |
all | 40% |
load | 21% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 50% |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 32% |
all | 40% |
load | 32% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 63% |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 32% |
all | 21% |
load | 36% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 36% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 16% |
all | 24% |
load | 20% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 50% |
add-sub | 22% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 19% |
all | 23% |
load | 23% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 50% |
add-sub | 28% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB %EDX,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 41ef6a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOVSXD %EDX,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD 0x8(%RDI),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %ECX,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $-0x10,%R12D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND %R11,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41ed8a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VPBROADCASTD %ESI,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM1,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VMOVQ %R9,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
MOV %EDX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%R8,%R10,8),%R15 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVDQU 0x79876(%RIP),%YMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VMOVDQU64 0x7984c(%RIP),%YMM16 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VPXOR %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPXOR %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVD %XMM10,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP %R11,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 41ed8f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 41ef6a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD 0x238(%RDI),%XMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x18(%RDI),%XMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x20(%RDI),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x28(%RDI),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x30(%RDI),%XMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x38(%RDI),%XMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x40(%RDI),%XMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x48(%RDI),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x50(%RDI),%XMM14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x58(%RDI),%XMM22 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x60(%RDI),%XMM21 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x68(%RDI),%XMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x70(%RDI),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x78(%RDI),%XMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x80(%RDI),%XMM20 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x88(%RDI),%XMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x218(%RDI),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD 0x90(%RDI),%XMM16 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $-0x4,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND %RCX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41edc9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VBROADCASTSD %XMM2,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM4,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RDX,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM6,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM0,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM14,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM22,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM12,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM20,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM16,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VXORPD %XMM19,%XMM19,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTD 0x79692(%RIP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VEXTRACTF32X4 $0x1,%YMM19,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %XMM0,%XMM19,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSD %XMM2,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %RCX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 41ef6a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VPBROADCASTQ %RCX,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
JMP 41ee38 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %EDX,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R8,%R10,8),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 41edac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %RCX,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM2,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM4,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RDX,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM6,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM0,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM14,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM22,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM12,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM20,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM16,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %RAX,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPADDQ 0x793b8(%RIP),%YMM20,%YMM20 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPCMPLTUQ %YMM19,%YMM20,%K1 | |||||||||||
VMOVUPD (%R9,%RAX,8),%YMM19{%K1}{z} | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VMULPD %YMM19,%YMM18,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTTPD2DQ %YMM18,%XMM19 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | 1 |
VRNDSCALEPD $0xb,%YMM18,%YMM20 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBPD %YMM20,%YMM18,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPMOVSXDQ %XMM19,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPSLLQ $0x3,%YMM20,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %YMM20,%YMM10,%YMM20 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VXORPD %XMM21,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM22,%XMM22,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD (,%YMM20,1),%YMM22{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM23,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD 0x8(,%YMM20,1),%YMM23{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VFMADD231PD %YMM17,%YMM18,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM17,%XMM17,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD 0x10(,%YMM20,1),%YMM17{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VFMADD213PD %YMM13,%YMM18,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM14,%YMM18,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDD 0x7941e(%RIP){1to4},%XMM19,%XMM2 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VPMOVSXDQ %XMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPSLLQ $0x3,%YMM2,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %YMM2,%YMM10,%YMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (,%YMM2,1),%YMM21{%K2} | 4 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 0 | 20 | 4 |
VMULPD %YMM23,%YMM11,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM12,%YMM18,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM15,%YMM18,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM16,%YMM18,%YMM7 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM24,%YMM18,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM5,%YMM18,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM25,%YMM18,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM2,%YMM22,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM6,%YMM18,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM9,%YMM18,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM8,%YMM18,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM1,%YMM17,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM21,%YMM7,%YMM3{%K1}{z} | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM3,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VADDPD %XMM1,%XMM3,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPERMILPD $0x1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VADDSD %XMM2,%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼qmcplusplus::BsplineFunctor | 1.56 | 0.01 |
○Loop 302 - BsplineFunctor.h:236-241 - exec | 1.04 | 0.01 |
○Loop 301 - BsplineFunctor.h:236-241 - exec | 0 | 0 |
○Loop 300 - BsplineFunctor.h:246-260 - exec | 0 | 0 |