Loop Id: 126 | Module: exec | Source: advec_cell_kernel.f90:83-157 [...] | Coverage: 0.01% |
---|
Loop Id: 126 | Module: exec | Source: advec_cell_kernel.f90:83-157 [...] | Coverage: 0.01% |
---|
0x428870 MOV 0x1a0(%RSP),%RAX |
0x428878 LEA (%RAX,%R11,1),%R14 |
0x42887c SUB 0x18(%RSP),%R14 |
0x428881 MOV %R15,%RAX |
0x428884 IMUL %R14,%RAX |
0x428888 MOV %RAX,0x280(%RSP) |
0x428890 MOV 0x30(%RSP),%RAX |
0x428895 IMUL %R14,%RAX |
0x428899 VPBROADCASTQ %RAX,%YMM13 |
0x42889f MOV 0x8(%RSP),%RAX |
0x4288a4 IMUL %R14,%RAX |
0x4288a8 VPBROADCASTQ %RAX,%YMM1 |
0x4288ae MOV %R13,%RAX |
0x4288b1 IMUL %R14,%RAX |
0x4288b5 MOV %RAX,0x28(%RSP) |
0x4288ba MOV (%RSP),%RAX |
0x4288be IMUL %R14,%RAX |
0x4288c2 VPBROADCASTQ %RAX,%YMM2 |
0x4288c8 VMOVDQA %YMM2,0x2a0(%RSP) |
0x4288d1 IMUL %R12,%R14 |
0x4288d5 XOR %EAX,%EAX |
0x4288d7 VPADDQ 0x360(%RSP),%YMM1,%YMM2 |
0x4288e0 VPADDQ 0x3a0(%RSP),%YMM13,%YMM1 |
0x4288e9 VPBROADCASTQ %RAX,%YMM3 |
0x4288ef VMOVDQA 0x3c0(%RSP),%YMM5 |
0x4288f8 VPSUBQ %YMM3,%YMM5,%YMM13 |
0x4288fc VPCMPNLEUQ 0xcf839(%RIP),%YMM13,%K1 |
0x428907 MOV 0x280(%RSP),%RCX |
0x42890f ADD 0x140(%RSP),%RCX |
0x428917 MOV 0x260(%RSP),%RSI |
0x42891f LEA (%RAX,%RSI,1),%RBX |
0x428923 SUB 0x60(%RSP),%RBX |
0x428928 VMOVUPD (%RCX,%RBX,8),%YMM13{%K1}{z} |
0x42892f VMOVAPD %YMM13,%YMM31{%K1} |
0x428935 VCMPPD $0x1,%YMM31,%YMM12,%K2{%K1} |
0x42893c VPBROADCASTD %EAX,%XMM13 |
0x428942 VPOR 0xd0d66(%RIP),%XMM13,%XMM13 |
0x42894a VPADDD 0x200(%RSP),%XMM13,%XMM5 |
0x428953 VPMINSD %XMM0,%XMM5,%XMM5 |
0x428958 VPADDQ %YMM3,%YMM4,%YMM3 |
0x42895c VPMOVQD %YMM3,%XMM3 |
0x428962 VPADDD 0x2f0(%RSP),%XMM13,%XMM7 |
0x42896b VPBLENDMD %XMM3,%XMM7,%XMM8{%K2} |
0x428971 VMOVDQA %XMM5,%XMM9 |
0x428975 VPADDD 0x2e0(%RSP),%XMM13,%XMM9{%K2} |
0x42897d VMOVDQA32 %XMM7,%XMM5{%K2} |
0x428983 VMOVDQA32 %XMM7,%XMM3{%K2} |
0x428989 VANDPD %YMM14,%YMM31,%YMM7 |
0x42898f VPMOVSXDQ %XMM3,%YMM3 |
0x428994 VPSUBQ %YMM6,%YMM3,%YMM3 |
0x428998 VPSLLQ $0x3,%YMM3,%YMM3 |
0x42899d VPADDQ %YMM3,%YMM1,%YMM1 |
0x4289a1 KMOVQ %K1,%K2 |
0x4289a6 VPXOR %XMM13,%XMM13,%XMM13 |
0x4289ab VGATHERQPD (,%YMM1,1),%YMM13{%K2} |
0x4289b6 VMOVAPD %YMM13,%YMM30{%K1} |
0x4289bc MOV 0x98(%RSP),%RAX |
0x4289c4 VMOVUPD (%RAX,%RBX,8),%YMM1{%K1}{z} |
0x4289cb VPMOVSXDQ %XMM5,%YMM5 |
0x4289d0 VPSUBQ %YMM6,%YMM5,%YMM5 |
0x4289d4 VPSLLQ $0x3,%YMM5,%YMM5 |
0x4289d9 VPADDQ 0x380(%RSP),%YMM5,%YMM5 |
0x4289e2 KMOVQ %K1,%K2 |
0x4289e7 VXORPD %XMM13,%XMM13,%XMM13 |
0x4289ec VGATHERQPD (,%YMM5,1),%YMM13{%K2} |
0x4289f7 VDIVPD %YMM30,%YMM7,%YMM5 |
0x4289fd VMOVAPD %YMM1,%YMM29{%K1} |
0x428a03 VMOVAPD %YMM29,%YMM1 |
0x428a09 VFMADD213PD %YMM29,%YMM5,%YMM1 |
0x428a0f VMOVAPD %YMM13,%YMM28{%K1} |
0x428a15 VDIVPD %YMM28,%YMM1,%YMM1 |
0x428a1b VPADDQ %YMM3,%YMM2,%YMM7 |
0x428a1f KMOVQ %K1,%K2 |
0x428a24 VXORPD %XMM10,%XMM10,%XMM10 |
0x428a29 VGATHERQPD (,%YMM7,1),%YMM10{%K2} |
0x428a34 VSUBPD %YMM5,%YMM16,%YMM13 |
0x428a3a VMOVAPD %YMM10,%YMM27{%K1} |
0x428a40 VPMOVSXDQ %XMM9,%YMM7 |
0x428a45 VPSUBQ %YMM6,%YMM7,%YMM7 |
0x428a49 VPSLLQ $0x3,%YMM7,%YMM7 |
0x428a4e VPADDQ %YMM7,%YMM2,%YMM9 |
0x428a52 KMOVQ %K1,%K2 |
0x428a57 VXORPD %XMM10,%XMM10,%XMM10 |
0x428a5c VGATHERQPD (,%YMM9,1),%YMM10{%K2} |
0x428a67 VMOVAPD %YMM10,%YMM26{%K1} |
0x428a6d VSUBPD %YMM26,%YMM27,%YMM9 |
0x428a73 VPMOVSXDQ %XMM8,%YMM8 |
0x428a78 VPSUBQ %YMM6,%YMM8,%YMM8 |
0x428a7c VPSLLQ $0x3,%YMM8,%YMM8 |
0x428a82 VPADDQ %YMM2,%YMM8,%YMM2 |
0x428a86 KMOVQ %K1,%K2 |
0x428a8b VXORPD %XMM10,%XMM10,%XMM10 |
0x428a90 VGATHERQPD (,%YMM2,1),%YMM10{%K2} |
0x428a9b VMOVAPD %YMM10,%YMM11{%K1} |
0x428aa1 VSUBPD %YMM27,%YMM11,%YMM2 |
0x428aa7 VMULPD %YMM2,%YMM9,%YMM10 |
0x428aab VCMPPD $0x1,%YMM10,%YMM12,%K2{%K1} |
0x428ab2 VANDPD %YMM14,%YMM9,%YMM9 |
0x428ab7 VANDPD %YMM2,%YMM14,%YMM10 |
0x428abb VMULPD %YMM1,%YMM9,%YMM22 |
0x428ac1 VFMADD231PD %YMM13,%YMM10,%YMM22 |
0x428ac7 VMULPD %YMM18,%YMM22,%YMM22 |
0x428acd VCMPPD $0x2,%YMM22,%YMM10,%K3 |
0x428ad4 VMOVAPD %YMM10,%YMM22{%K3} |
0x428ada VCMPPD $0x2,%YMM12,%YMM2,%K3 |
0x428ae1 VSUBPD %YMM5,%YMM15,%YMM2 |
0x428ae5 VXORPD %YMM17,%YMM2,%YMM2{%K3} |
0x428aeb VCMPPD $0x2,%YMM22,%YMM9,%K3 |
0x428af2 VMOVAPD %YMM9,%YMM22{%K3} |
0x428af8 VMOVAPD %YMM27,%YMM5 |
0x428afe VFMADD231PD %YMM2,%YMM22,%YMM5{%K2} |
0x428b04 VMOVDQA 0x2a0(%RSP),%YMM2 |
0x428b0d VPADDQ 0x340(%RSP),%YMM2,%YMM2 |
0x428b16 VMULPD %YMM31,%YMM5,%YMM5 |
0x428b1c MOV 0x28(%RSP),%RAX |
0x428b21 ADD 0x88(%RSP),%RAX |
0x428b29 VMOVUPD %YMM5,(%RAX,%RBX,8){%K1} |
0x428b30 VPADDQ %YMM3,%YMM2,%YMM3 |
0x428b34 KMOVQ %K1,%K2 |
0x428b39 VXORPD %XMM9,%XMM9,%XMM9 |
0x428b3e VGATHERQPD (,%YMM3,1),%YMM9{%K2} |
0x428b49 VPADDQ %YMM7,%YMM2,%YMM3 |
0x428b4d KMOVQ %K1,%K2 |
0x428b52 VPXOR %XMM7,%XMM7,%XMM7 |
0x428b56 VGATHERQPD (,%YMM3,1),%YMM7{%K2} |
0x428b61 VMOVAPD %YMM9,%YMM24{%K1} |
0x428b67 VMOVAPD %YMM7,%YMM25{%K1} |
0x428b6d VSUBPD %YMM25,%YMM24,%YMM3 |
0x428b73 VPADDQ %YMM2,%YMM8,%YMM2 |
0x428b77 KMOVQ %K1,%K2 |
0x428b7c VXORPD %XMM7,%XMM7,%XMM7 |
0x428b80 VGATHERQPD (,%YMM2,1),%YMM7{%K2} |
0x428b8b VMOVAPD %YMM7,%YMM23{%K1} |
0x428b91 VSUBPD %YMM24,%YMM23,%YMM2 |
0x428b97 VMULPD %YMM3,%YMM2,%YMM7 |
0x428b9b VCMPPD $0x1,%YMM7,%YMM12,%K2{%K1} |
0x428ba2 VANDPD %YMM5,%YMM14,%YMM7 |
0x428ba6 VMULPD %YMM30,%YMM27,%YMM8 |
0x428bac VDIVPD %YMM8,%YMM7,%YMM7 |
0x428bb1 VANDPD %YMM3,%YMM14,%YMM3 |
0x428bb5 VANDPD %YMM2,%YMM14,%YMM8 |
0x428bb9 VMULPD %YMM1,%YMM3,%YMM1 |
0x428bbd VFMADD231PD %YMM13,%YMM8,%YMM1 |
0x428bc2 VMULPD %YMM18,%YMM1,%YMM1 |
0x428bc8 VCMPPD $0x2,%YMM1,%YMM8,%K3 |
0x428bcf VMOVAPD %YMM8,%YMM1{%K3} |
0x428bd5 VCMPPD $0x2,%YMM12,%YMM2,%K3 |
0x428bdc VSUBPD %YMM7,%YMM15,%YMM2 |
0x428be0 VXORPD %YMM17,%YMM2,%YMM2{%K3} |
0x428be6 VCMPPD $0x2,%YMM1,%YMM3,%K3 |
0x428bed VMOVAPD %YMM3,%YMM1{%K3} |
0x428bf3 VMOVAPD %YMM24,%YMM3 |
0x428bf9 VFMADD231PD %YMM2,%YMM1,%YMM3{%K2} |
0x428bff VMULPD %YMM5,%YMM3,%YMM1 |
0x428c03 ADD 0xa0(%RSP),%R14 |
0x428c0b VMOVUPD %YMM1,(%R14,%RBX,8){%K1} |
0x428c12 LEA 0x1(%R11),%RAX |
0x428c16 ADD %R12,%RDI |
0x428c19 ADD %R13,%R9 |
0x428c1c ADD %R15,%R10 |
0x428c1f CMP 0x20(%RSP),%R11 |
0x428c24 MOV %RAX,%R11 |
0x428c27 JE 4263e4 |
0x428c2d TEST %R8,%R8 |
0x428c30 JE 428870 |
0x428c36 VMOVAPS %YMM30,0x160(%RSP) |
0x428c3e VMOVAPS %YMM29,0x1c0(%RSP) |
0x428c46 VMOVAPS %YMM28,0x1e0(%RSP) |
0x428c4e VMOVAPS %YMM26,0x100(%RSP) |
0x428c56 VMOVAPD %YMM11,%YMM30 |
0x428c5c VMOVAPD %YMM25,%YMM11 |
0x428c62 VMOVAPD %YMM23,%YMM29 |
0x428c68 MOV 0x1a0(%RSP),%RAX |
0x428c70 MOV %R11,0x180(%RSP) |
0x428c78 ADD %R11,%RAX |
0x428c7b SUB 0x18(%RSP),%RAX |
0x428c80 IMUL %RAX,%R15 |
0x428c84 MOV %R15,0x280(%RSP) |
0x428c8c MOV 0x30(%RSP),%RBX |
0x428c91 IMUL %RAX,%RBX |
0x428c95 VPBROADCASTQ %RBX,%YMM13 |
0x428c9b MOV 0x8(%RSP),%R15 |
0x428ca0 IMUL %RAX,%R15 |
0x428ca4 VPBROADCASTQ %R15,%YMM1 |
0x428caa IMUL %RAX,%R13 |
0x428cae MOV %R13,0x28(%RSP) |
0x428cb3 MOV (%RSP),%R14 |
0x428cb7 IMUL %RAX,%R14 |
0x428cbb VPBROADCASTQ %R14,%YMM2 |
0x428cc1 VMOVDQA %YMM2,0x2a0(%RSP) |
0x428cca IMUL %R12,%RAX |
0x428cce MOV %RAX,0x38(%RSP) |
0x428cd3 XOR %EAX,%EAX |
0x428cd5 MOV 0xb8(%RSP),%R8 |
0x428cdd MOV %R10,0x58(%RSP) |
0x428ce2 MOV 0xb0(%RSP),%R10 |
0x428cea MOV 0x48(%RSP),%R12 |
0x428cef MOV 0x98(%RSP),%RSI |
0x428cf7 MOV %R9,%R11 |
0x428cfa MOV %RDI,%R9 |
0x428cfd MOV %RDX,%RDI |
0x428d00 MOV 0x310(%RSP),%RDX |
0x428d08 MOV 0x318(%RSP),%RCX |
(127) 0x428d10 MOV 0x58(%RSP),%R13 |
(127) 0x428d15 VMOVUPD (%R13,%RAX,8),%YMM2 |
(127) 0x428d1c VCMPPD $0x1,%YMM2,%YMM12,%K1 |
(127) 0x428d23 LEA (%RDX,%RAX,1),%R13D |
(127) 0x428d27 VPBROADCASTD %R13D,%XMM5 |
(127) 0x428d2d VPBROADCASTQ %RAX,%YMM3 |
(127) 0x428d33 VPADDQ %YMM3,%YMM4,%YMM3 |
(127) 0x428d37 VPMOVQD %YMM3,%XMM3 |
(127) 0x428d3d VPADDD %XMM20,%XMM5,%XMM7 |
(127) 0x428d43 VPBLENDMD %XMM3,%XMM7,%XMM8{%K1} |
(127) 0x428d49 VMOVDQA32 %XMM7,%XMM3{%K1} |
(127) 0x428d4f VPMOVSXDQ %XMM3,%YMM3 |
(127) 0x428d54 VPSUBQ %YMM6,%YMM3,%YMM9 |
(127) 0x428d58 LEA (%R12,%RBX,1),%R13 |
(127) 0x428d5c VPXOR %XMM3,%XMM3,%XMM3 |
(127) 0x428d60 KXNORW %K0,%K0,%K2 |
(127) 0x428d64 VGATHERQPD (%R13,%YMM9,8),%YMM3{%K2} |
(127) 0x428d6c VPADDD %XMM19,%XMM5,%XMM10 |
(127) 0x428d72 VPMINSD %XMM0,%XMM10,%XMM10 |
(127) 0x428d77 VMOVDQA64 %XMM10,%XMM22 |
(127) 0x428d7d VMOVDQA32 %XMM7,%XMM10{%K1} |
(127) 0x428d83 VPMOVSXDQ %XMM10,%YMM7 |
(127) 0x428d88 VPSUBQ %YMM6,%YMM7,%YMM7 |
(127) 0x428d8c VPXOR %XMM10,%XMM10,%XMM10 |
(127) 0x428d91 KXNORW %K0,%K0,%K2 |
(127) 0x428d95 VGATHERQPD (%RSI,%YMM7,8),%YMM10{%K2} |
(127) 0x428d9c VPADDD %XMM21,%XMM5,%XMM22{%K1} |
(127) 0x428da2 LEA (%R8,%R15,1),%R13 |
(127) 0x428da6 VPXOR %XMM5,%XMM5,%XMM5 |
(127) 0x428daa KXNORW %K0,%K0,%K1 |
(127) 0x428dae VGATHERQPD (%R13,%YMM9,8),%YMM5{%K1} |
(127) 0x428db6 VANDPD %YMM2,%YMM14,%YMM7 |
(127) 0x428dba VMOVUPD (%RDI,%RAX,8),%YMM23 |
(127) 0x428dc1 VPMOVSXDQ %XMM22,%YMM22 |
(127) 0x428dc7 VPSUBQ %YMM6,%YMM22,%YMM22 |
(127) 0x428dcd VXORPD %XMM25,%XMM25,%XMM25 |
(127) 0x428dd3 KXNORW %K0,%K0,%K1 |
(127) 0x428dd7 VGATHERQPD (%R13,%YMM22,8),%YMM25{%K1} |
(127) 0x428ddf VDIVPD %YMM3,%YMM7,%YMM7 |
(127) 0x428de3 VFMADD213PD %YMM23,%YMM7,%YMM23 |
(127) 0x428de9 VDIVPD %YMM10,%YMM23,%YMM10 |
(127) 0x428def VPMOVSXDQ %XMM8,%YMM8 |
(127) 0x428df4 VPSUBQ %YMM6,%YMM8,%YMM8 |
(127) 0x428df8 VXORPD %XMM23,%XMM23,%XMM23 |
(127) 0x428dfe KXNORW %K0,%K0,%K1 |
(127) 0x428e02 VGATHERQPD (%R13,%YMM8,8),%YMM23{%K1} |
(127) 0x428e0a VSUBPD %YMM7,%YMM16,%YMM26 |
(127) 0x428e10 VSUBPD %YMM25,%YMM5,%YMM25 |
(127) 0x428e16 VSUBPD %YMM5,%YMM23,%YMM23 |
(127) 0x428e1c VMULPD %YMM25,%YMM23,%YMM28 |
(127) 0x428e22 VCMPPD $0x1,%YMM28,%YMM12,%K1 |
(127) 0x428e29 VSUBPD %YMM7,%YMM15,%YMM7 |
(127) 0x428e2d VCMPPD $0x2,%YMM12,%YMM23,%K2 |
(127) 0x428e34 VXORPD %YMM17,%YMM7,%YMM7{%K2} |
(127) 0x428e3a VANDPD %YMM14,%YMM25,%YMM25 |
(127) 0x428e40 VANDPD %YMM14,%YMM23,%YMM23 |
(127) 0x428e46 VMULPD %YMM10,%YMM25,%YMM28 |
(127) 0x428e4c VFMADD231PD %YMM26,%YMM23,%YMM28 |
(127) 0x428e52 VMULPD %YMM18,%YMM28,%YMM28 |
(127) 0x428e58 VCMPPD $0x2,%YMM28,%YMM23,%K2 |
(127) 0x428e5f VMOVAPD %YMM23,%YMM28{%K2} |
(127) 0x428e65 VCMPPD $0x2,%YMM28,%YMM25,%K2 |
(127) 0x428e6c VMOVAPD %YMM25,%YMM28{%K2} |
(127) 0x428e72 VMOVAPD %YMM5,%YMM23 |
(127) 0x428e78 VFMADD231PD %YMM7,%YMM28,%YMM23{%K1} |
(127) 0x428e7e VMULPD %YMM2,%YMM23,%YMM2 |
(127) 0x428e84 VMOVUPD %YMM2,(%R11,%RAX,8) |
(127) 0x428e8a LEA (%R10,%R14,1),%R13 |
(127) 0x428e8e VXORPD %XMM7,%XMM7,%XMM7 |
(127) 0x428e92 KXNORW %K0,%K0,%K1 |
(127) 0x428e96 VGATHERQPD (%R13,%YMM9,8),%YMM7{%K1} |
(127) 0x428e9e VXORPD %XMM9,%XMM9,%XMM9 |
(127) 0x428ea3 KXNORW %K0,%K0,%K1 |
(127) 0x428ea7 VGATHERQPD (%R13,%YMM22,8),%YMM9{%K1} |
(127) 0x428eaf VXORPD %XMM22,%XMM22,%XMM22 |
(127) 0x428eb5 KXNORW %K0,%K0,%K1 |
(127) 0x428eb9 VGATHERQPD (%R13,%YMM8,8),%YMM22{%K1} |
(127) 0x428ec1 VMULPD %YMM3,%YMM5,%YMM3 |
(127) 0x428ec5 VSUBPD %YMM9,%YMM7,%YMM5 |
(127) 0x428eca VSUBPD %YMM7,%YMM22,%YMM8 |
(127) 0x428ed0 VMULPD %YMM5,%YMM8,%YMM9 |
(127) 0x428ed4 VCMPPD $0x1,%YMM9,%YMM12,%K1 |
(127) 0x428edb VANDPD %YMM2,%YMM14,%YMM9 |
(127) 0x428edf VDIVPD %YMM3,%YMM9,%YMM3 |
(127) 0x428ee3 VSUBPD %YMM3,%YMM15,%YMM3 |
(127) 0x428ee7 VCMPPD $0x2,%YMM12,%YMM8,%K2 |
(127) 0x428eee VXORPD %YMM17,%YMM3,%YMM3{%K2} |
(127) 0x428ef4 VANDPD %YMM5,%YMM14,%YMM5 |
(127) 0x428ef8 VANDPD %YMM14,%YMM8,%YMM8 |
(127) 0x428efd VMULPD %YMM5,%YMM10,%YMM9 |
(127) 0x428f01 VFMADD231PD %YMM26,%YMM8,%YMM9 |
(127) 0x428f07 VMULPD %YMM18,%YMM9,%YMM9 |
(127) 0x428f0d VCMPPD $0x2,%YMM9,%YMM8,%K2 |
(127) 0x428f14 VMOVAPD %YMM8,%YMM9{%K2} |
(127) 0x428f1a VCMPPD $0x2,%YMM9,%YMM5,%K2 |
(127) 0x428f21 VMOVAPD %YMM5,%YMM9{%K2} |
(127) 0x428f27 VFMADD231PD %YMM3,%YMM9,%YMM7{%K1} |
(127) 0x428f2d VMULPD %YMM2,%YMM7,%YMM2 |
(127) 0x428f31 VMOVUPD %YMM2,(%R9,%RAX,8) |
(127) 0x428f37 ADD $0x4,%RAX |
(127) 0x428f3b CMP %RCX,%RAX |
(127) 0x428f3e JBE 428d10 |
0x428f44 MOV 0x308(%RSP),%R8 |
0x428f4c MOV %R8,%RAX |
0x428f4f CMP %R8,0x328(%RSP) |
0x428f57 MOV 0x338(%RSP),%R12 |
0x428f5f MOV 0x330(%RSP),%R13 |
0x428f67 MOV 0x320(%RSP),%R15 |
0x428f6f MOV %RDI,%RDX |
0x428f72 MOV %R9,%RDI |
0x428f75 MOV %R11,%R9 |
0x428f78 MOV 0x58(%RSP),%R10 |
0x428f7d MOV 0x180(%RSP),%R11 |
0x428f85 VMOVAPD %YMM29,%YMM23 |
0x428f8b VMOVAPD %YMM11,%YMM25 |
0x428f91 VMOVAPD %YMM30,%YMM11 |
0x428f97 VMOVAPD 0x100(%RSP),%YMM26 |
0x428f9f VMOVAPD 0x1e0(%RSP),%YMM28 |
0x428fa7 VMOVAPD 0x1c0(%RSP),%YMM29 |
0x428faf VMOVAPD 0x160(%RSP),%YMM30 |
0x428fb7 MOV 0x38(%RSP),%R14 |
0x428fbc JNE 4288d7 |
0x428fc2 JMP 428c12 |
/beegfs/hackathon/users/eoseret/qaas_runs/170-861-0321/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/advec_cell_kernel.f90: 83 - 157 |
-------------------------------------------------------------------------------- |
83: IF(dir.EQ.g_xdir) THEN |
[...] |
109: DO k=y_min,y_max |
110: DO j=x_min,x_max+2 |
111: |
112: IF(vol_flux_x(j,k).GT.0.0)THEN |
[...] |
118: upwind =MIN(j+1,x_max+2) |
[...] |
124: sigmat=ABS(vol_flux_x(j,k))/pre_vol(donor,k) |
125: sigma3=(1.0_8+sigmat)*(vertexdx(j)/vertexdx(dif)) |
126: sigma4=2.0_8-sigmat |
127: |
128: sigma=sigmat |
129: sigmav=sigmat |
130: |
131: diffuw=density1(donor,k)-density1(upwind,k) |
132: diffdw=density1(downwind,k)-density1(donor,k) |
133: wind=1.0_8 |
134: IF(diffdw.LE.0.0) wind=-1.0_8 |
135: IF(diffuw*diffdw.GT.0.0)THEN |
136: limiter=(1.0_8-sigmav)*wind*MIN(ABS(diffuw),ABS(diffdw)& |
137: ,one_by_six*(sigma3*ABS(diffuw)+sigma4*ABS(diffdw))) |
138: ELSE |
139: limiter=0.0 |
140: ENDIF |
141: mass_flux_x(j,k)=vol_flux_x(j,k)*(density1(donor,k)+limiter) |
142: |
143: sigmam=ABS(mass_flux_x(j,k))/(density1(donor,k)*pre_vol(donor,k)) |
144: diffuw=energy1(donor,k)-energy1(upwind,k) |
145: diffdw=energy1(downwind,k)-energy1(donor,k) |
146: wind=1.0_8 |
147: IF(diffdw.LE.0.0) wind=-1.0_8 |
148: IF(diffuw*diffdw.GT.0.0)THEN |
149: limiter=(1.0_8-sigmam)*wind*MIN(ABS(diffuw),ABS(diffdw)& |
150: ,one_by_six*(sigma3*ABS(diffuw)+sigma4*ABS(diffdw))) |
151: ELSE |
152: limiter=0.0 |
153: ENDIF |
154: |
155: ener_flux(j,k)=mass_flux_x(j,k)*(energy1(donor,k)+limiter) |
156: |
157: ENDDO |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.22 |
CQA speedup if FP arith vectorized | 1.37 |
CQA speedup if fully vectorized | 3.57 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.73 |
Bottlenecks | |
Function | advec_cell_kernel_.DIR.OMP.PARALLEL.2 |
Source | advec_cell_kernel.f90:83-83,advec_cell_kernel.f90:109-109,advec_cell_kernel.f90:112-112,advec_cell_kernel.f90:118-118,advec_cell_kernel.f90:124-126,advec_cell_kernel.f90:131-132,advec_cell_kernel.f90:135-137,advec_cell_kernel.f90:141-145,advec_cell_kernel.f90:148-150,advec_cell_kernel.f90:155-157 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 45.17 |
CQA cycles if no scalar integer | 37.17 |
CQA cycles if FP arith vectorized | 32.90 |
CQA cycles if fully vectorized | 12.66 |
Front-end cycles | 45.17 |
DIV/SQRT cycles | 4.50 |
P0 cycles | 6.00 |
P1 cycles | 4.42 |
P2 cycles | 4.25 |
P3 cycles | 1.50 |
P4 cycles | 13.44 |
P5 cycles | 13.44 |
P6 cycles | 13.44 |
P7 cycles | 22.33 |
P8 cycles | 23.39 |
P9 cycles | 22.33 |
P10 cycles | 22.28 |
P11 cycles | 18.50 |
P12 cycles | 18.50 |
P13 cycles | 10.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 145.00 |
Nb uops | 271.00 |
Nb loads | 37.33 |
Nb stores | 9.00 |
Nb stack references | 30.00 |
FLOP/cycle | 1.71 |
Nb FLOP add-sub | 18.67 |
Nb FLOP mul | 24.00 |
Nb FLOP fma | 13.33 |
Nb FLOP div | 8.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 24.75 |
Bytes prefetched | 0.00 |
Bytes loaded | 656.00 |
Bytes stored | 192.00 |
Stride 0 | 1.67 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 10.67 |
Stride indirect | 0.67 |
Vectorization ratio all | 66.20 |
Vectorization ratio load | 62.99 |
Vectorization ratio store | 56.11 |
Vectorization ratio mul | 58.33 |
Vectorization ratio add_sub | 60.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 70.24 |
Vector-efficiency ratio all | 34.81 |
Vector-efficiency ratio load | 33.63 |
Vector-efficiency ratio store | 33.54 |
Vector-efficiency ratio mul | 34.38 |
Vector-efficiency ratio add_sub | 33.33 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 50.00 |
Vector-efficiency ratio other | 34.80 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.13 |
CQA speedup if FP arith vectorized | 1.45 |
CQA speedup if fully vectorized | 3.46 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.75 |
Bottlenecks | micro-operation queue, |
Function | advec_cell_kernel_.DIR.OMP.PARALLEL.2 |
Source | advec_cell_kernel.f90:83-83,advec_cell_kernel.f90:109-109,advec_cell_kernel.f90:112-112,advec_cell_kernel.f90:118-118,advec_cell_kernel.f90:124-126,advec_cell_kernel.f90:131-132,advec_cell_kernel.f90:135-137,advec_cell_kernel.f90:141-145,advec_cell_kernel.f90:148-150,advec_cell_kernel.f90:155-157 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 58.67 |
CQA cycles if no scalar integer | 52.00 |
CQA cycles if FP arith vectorized | 40.53 |
CQA cycles if fully vectorized | 16.97 |
Front-end cycles | 58.67 |
DIV/SQRT cycles | 4.50 |
P0 cycles | 6.00 |
P1 cycles | 4.25 |
P2 cycles | 4.25 |
P3 cycles | 1.00 |
P4 cycles | 10.33 |
P5 cycles | 10.33 |
P6 cycles | 10.33 |
P7 cycles | 33.50 |
P8 cycles | 33.58 |
P9 cycles | 33.50 |
P10 cycles | 33.42 |
P11 cycles | 25.50 |
P12 cycles | 25.50 |
P13 cycles | 15.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 165.00 |
Nb uops | 352.00 |
Nb loads | 35.00 |
Nb stores | 5.00 |
Nb stack references | 23.00 |
FLOP/cycle | 1.98 |
Nb FLOP add-sub | 28.00 |
Nb FLOP mul | 36.00 |
Nb FLOP fma | 20.00 |
Nb FLOP div | 12.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 14.18 |
Bytes prefetched | 0.00 |
Bytes loaded | 720.00 |
Bytes stored | 112.00 |
Stride 0 | 2.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 8.00 |
Stride indirect | 1.00 |
Vectorization ratio all | 91.41 |
Vectorization ratio load | 95.45 |
Vectorization ratio store | 60.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 90.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 91.67 |
Vector-efficiency ratio all | 42.63 |
Vector-efficiency ratio load | 43.75 |
Vector-efficiency ratio store | 35.00 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 43.75 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 50.00 |
Vector-efficiency ratio other | 40.54 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.63 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 2.93 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.04 |
Bottlenecks | micro-operation queue, |
Function | advec_cell_kernel_.DIR.OMP.PARALLEL.2 |
Source | advec_cell_kernel.f90:83-83,advec_cell_kernel.f90:109-109,advec_cell_kernel.f90:112-112,advec_cell_kernel.f90:118-118,advec_cell_kernel.f90:124-126,advec_cell_kernel.f90:131-132,advec_cell_kernel.f90:135-137,advec_cell_kernel.f90:141-145,advec_cell_kernel.f90:148-150,advec_cell_kernel.f90:155-157 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 11.83 |
CQA cycles if no scalar integer | 4.50 |
CQA cycles if FP arith vectorized | 11.83 |
CQA cycles if fully vectorized | 4.04 |
Front-end cycles | 11.83 |
DIV/SQRT cycles | 3.75 |
P0 cycles | 6.00 |
P1 cycles | 3.75 |
P2 cycles | 3.50 |
P3 cycles | 2.00 |
P4 cycles | 11.33 |
P5 cycles | 11.33 |
P6 cycles | 11.33 |
P7 cycles | 0.00 |
P8 cycles | 3.00 |
P9 cycles | 0.00 |
P10 cycles | 0.00 |
P11 cycles | 2.50 |
P12 cycles | 2.50 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 68.00 |
Nb uops | 71.00 |
Nb loads | 24.00 |
Nb stores | 10.00 |
Nb stack references | 27.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 41.24 |
Bytes prefetched | 0.00 |
Bytes loaded | 288.00 |
Bytes stored | 200.00 |
Stride 0 | 1.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 8.00 |
Stride indirect | 0.00 |
Vectorization ratio all | 26.32 |
Vectorization ratio load | 20.00 |
Vectorization ratio store | 50.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 33.33 |
Vector-efficiency ratio all | 22.26 |
Vector-efficiency ratio load | 20.00 |
Vector-efficiency ratio store | 31.25 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 24.65 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.18 |
CQA speedup if FP arith vectorized | 1.40 |
CQA speedup if fully vectorized | 3.83 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.94 |
Bottlenecks | micro-operation queue, |
Function | advec_cell_kernel_.DIR.OMP.PARALLEL.2 |
Source | advec_cell_kernel.f90:83-83,advec_cell_kernel.f90:109-109,advec_cell_kernel.f90:112-112,advec_cell_kernel.f90:118-118,advec_cell_kernel.f90:124-126,advec_cell_kernel.f90:131-132,advec_cell_kernel.f90:135-137,advec_cell_kernel.f90:141-145,advec_cell_kernel.f90:148-150,advec_cell_kernel.f90:155-157 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 65.00 |
CQA cycles if no scalar integer | 55.00 |
CQA cycles if FP arith vectorized | 46.34 |
CQA cycles if fully vectorized | 16.97 |
Front-end cycles | 65.00 |
DIV/SQRT cycles | 5.25 |
P0 cycles | 6.00 |
P1 cycles | 5.25 |
P2 cycles | 5.00 |
P3 cycles | 1.50 |
P4 cycles | 18.67 |
P5 cycles | 18.67 |
P6 cycles | 18.67 |
P7 cycles | 33.50 |
P8 cycles | 33.58 |
P9 cycles | 33.50 |
P10 cycles | 33.42 |
P11 cycles | 27.50 |
P12 cycles | 27.50 |
P13 cycles | 15.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 202.00 |
Nb uops | 390.00 |
Nb loads | 53.00 |
Nb stores | 12.00 |
Nb stack references | 40.00 |
FLOP/cycle | 1.78 |
Nb FLOP add-sub | 28.00 |
Nb FLOP mul | 36.00 |
Nb FLOP fma | 20.00 |
Nb FLOP div | 12.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 18.83 |
Bytes prefetched | 0.00 |
Bytes loaded | 960.00 |
Bytes stored | 264.00 |
Stride 0 | 2.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 16.00 |
Stride indirect | 1.00 |
Vectorization ratio all | 80.86 |
Vectorization ratio load | 73.53 |
Vectorization ratio store | 58.33 |
Vectorization ratio mul | 75.00 |
Vectorization ratio add_sub | 90.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 85.71 |
Vector-efficiency ratio all | 39.54 |
Vector-efficiency ratio load | 37.13 |
Vector-efficiency ratio store | 34.38 |
Vector-efficiency ratio mul | 40.63 |
Vector-efficiency ratio add_sub | 43.75 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | 50.00 |
Vector-efficiency ratio other | 39.21 |
Path / |
Function | advec_cell_kernel_.DIR.OMP.PARALLEL.2 |
Source file and lines | advec_cell_kernel.f90:83-157 |
Module | exec |
nb instructions | 145 |
nb uops | 271 |
loop length | 852.67 |
used x86 registers | 14.67 |
used mmx registers | 0 |
used xmm registers | 5.33 |
used ymm registers | 22 |
used zmm registers | 0 |
nb stack references | 30 |
ADD-SUB / MUL ratio | 0.78 |
micro-operation queue | 45.17 cycles |
front end | 45.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.50 | 6.00 | 4.42 | 4.25 | 1.50 | 13.44 | 13.44 | 13.44 | 22.33 | 23.39 | 22.33 | 22.28 | 18.50 | 18.50 |
cycles | 4.50 | 6.00 | 4.42 | 4.25 | 1.50 | 13.44 | 13.44 | 13.44 | 22.33 | 23.39 | 22.33 | 22.28 | 18.50 | 18.50 |
Cycles executing div or sqrt instructions | 10.00 |
Longest recurrence chain latency (RecMII) | 1.00 |
Front-end | 45.17 |
Dispatch | 26.17 |
DIV/SQRT | 10.00 |
Data deps. | 1.00 |
Overall L1 | 45.17 |
all | 46% |
load | 48% |
store | 22% |
mul | 0% |
add-sub | 57% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 45% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 66% |
load | 62% |
store | 56% |
mul | 58% |
add-sub | 60% |
fma | 100% |
div/sqrt | 100% |
other | 70% |
all | 25% |
load | 26% |
store | 20% |
mul | 12% |
add-sub | 32% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 21% |
all | 48% |
load | 50% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 48% |
all | 34% |
load | 33% |
store | 33% |
mul | 34% |
add-sub | 33% |
fma | 50% |
div/sqrt | 50% |
other | 34% |
Function | advec_cell_kernel_.DIR.OMP.PARALLEL.2 |
Source file and lines | advec_cell_kernel.f90:83-157 |
Module | exec |
nb instructions | 165 |
nb uops | 352 |
loop length | 966 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 28 |
used zmm registers | 0 |
nb stack references | 23 |
ADD-SUB / MUL ratio | 0.78 |
micro-operation queue | 58.67 cycles |
front end | 58.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.50 | 6.00 | 4.25 | 4.25 | 1.00 | 10.33 | 10.33 | 10.33 | 33.50 | 33.58 | 33.50 | 33.42 | 25.50 | 25.50 |
cycles | 4.50 | 6.00 | 4.25 | 4.25 | 1.00 | 10.33 | 10.33 | 10.33 | 33.50 | 33.58 | 33.50 | 33.42 | 25.50 | 25.50 |
Cycles executing div or sqrt instructions | 15.00 |
Longest recurrence chain latency (RecMII) | 1.00 |
Front-end | 58.67 |
Dispatch | 33.58 |
DIV/SQRT | 15.00 |
Data deps. | 1.00 |
Overall L1 | 58.67 |
all | 78% |
load | 91% |
store | 33% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 86% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 75% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 91% |
load | 95% |
store | 60% |
mul | 100% |
add-sub | 90% |
fma | 100% |
div/sqrt | 100% |
other | 91% |
all | 34% |
load | 38% |
store | 25% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 41% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 27% |
all | 48% |
load | 50% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 46% |
all | 42% |
load | 43% |
store | 35% |
mul | 50% |
add-sub | 43% |
fma | 50% |
div/sqrt | 50% |
other | 40% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV 0x1a0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%RAX,%R11,1),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB 0x18(%RSP),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %R14,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x280(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %R14,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RAX,%YMM13 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x8(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %R14,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RAX,%YMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %R14,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %R14,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RAX,%YMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDQA %YMM2,0x2a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
IMUL %R12,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPADDQ 0x360(%RSP),%YMM1,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VPADDQ 0x3a0(%RSP),%YMM13,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RAX,%YMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDQA 0x3c0(%RSP),%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPSUBQ %YMM3,%YMM5,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPCMPNLEUQ 0xcf839(%RIP),%YMM13,%K1 | |||||||||||||||||
MOV 0x280(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD 0x140(%RSP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV 0x260(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%RAX,%RSI,1),%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB 0x60(%RSP),%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD (%RCX,%RBX,8),%YMM13{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD %YMM13,%YMM31{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%YMM31,%YMM12,%K2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPBROADCASTD %EAX,%XMM13 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPOR 0xd0d66(%RIP),%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VPADDD 0x200(%RSP),%XMM13,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VPMINSD %XMM0,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPADDQ %YMM3,%YMM4,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPMOVQD %YMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 0.50 |
VPADDD 0x2f0(%RSP),%XMM13,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VPBLENDMD %XMM3,%XMM7,%XMM8{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVDQA %XMM5,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDD 0x2e0(%RSP),%XMM13,%XMM9{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VMOVDQA32 %XMM7,%XMM5{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQA32 %XMM7,%XMM3{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VANDPD %YMM14,%YMM31,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPMOVSXDQ %XMM3,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPSUBQ %YMM6,%YMM3,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPSLLQ $0x3,%YMM3,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDQ %YMM3,%YMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
VPXOR %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD (,%YMM1,1),%YMM13{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VMOVAPD %YMM13,%YMM30{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x98(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVUPD (%RAX,%RBX,8),%YMM1{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPMOVSXDQ %XMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPSUBQ %YMM6,%YMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPSLLQ $0x3,%YMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDQ 0x380(%RSP),%YMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (,%YMM5,1),%YMM13{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VDIVPD %YMM30,%YMM7,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 |
VMOVAPD %YMM1,%YMM29{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM29,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VFMADD213PD %YMM29,%YMM5,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM13,%YMM28{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VDIVPD %YMM28,%YMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 |
VPADDQ %YMM3,%YMM2,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (,%YMM7,1),%YMM10{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VSUBPD %YMM5,%YMM16,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVAPD %YMM10,%YMM27{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMOVSXDQ %XMM9,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPSUBQ %YMM6,%YMM7,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPSLLQ $0x3,%YMM7,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDQ %YMM7,%YMM2,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (,%YMM9,1),%YMM10{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VMOVAPD %YMM10,%YMM26{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VSUBPD %YMM26,%YMM27,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VPMOVSXDQ %XMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPSUBQ %YMM6,%YMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPSLLQ $0x3,%YMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDQ %YMM2,%YMM8,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (,%YMM2,1),%YMM10{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VMOVAPD %YMM10,%YMM11{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VSUBPD %YMM27,%YMM11,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM2,%YMM9,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x1,%YMM10,%YMM12,%K2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPD %YMM14,%YMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VANDPD %YMM2,%YMM14,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMULPD %YMM1,%YMM9,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD231PD %YMM13,%YMM10,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM18,%YMM22,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x2,%YMM22,%YMM10,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM10,%YMM22{%K3} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x2,%YMM12,%YMM2,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %YMM5,%YMM15,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VXORPD %YMM17,%YMM2,%YMM2{%K3} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VCMPPD $0x2,%YMM22,%YMM9,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM9,%YMM22{%K3} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM27,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VFMADD231PD %YMM2,%YMM22,%YMM5{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQA 0x2a0(%RSP),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPADDQ 0x340(%RSP),%YMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VMULPD %YMM31,%YMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD 0x88(%RSP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %YMM5,(%RAX,%RBX,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VPADDQ %YMM3,%YMM2,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (,%YMM3,1),%YMM9{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VPADDQ %YMM7,%YMM2,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
VPXOR %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD (,%YMM3,1),%YMM7{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VMOVAPD %YMM9,%YMM24{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM7,%YMM25{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VSUBPD %YMM25,%YMM24,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VPADDQ %YMM2,%YMM8,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (,%YMM2,1),%YMM7{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VMOVAPD %YMM7,%YMM23{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VSUBPD %YMM24,%YMM23,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM3,%YMM2,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x1,%YMM7,%YMM12,%K2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPD %YMM5,%YMM14,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMULPD %YMM30,%YMM27,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VDIVPD %YMM8,%YMM7,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 |
VANDPD %YMM3,%YMM14,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VANDPD %YMM2,%YMM14,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMULPD %YMM1,%YMM3,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD231PD %YMM13,%YMM8,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM18,%YMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x2,%YMM1,%YMM8,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM8,%YMM1{%K3} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x2,%YMM12,%YMM2,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %YMM7,%YMM15,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VXORPD %YMM17,%YMM2,%YMM2{%K3} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VCMPPD $0x2,%YMM1,%YMM3,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM3,%YMM1{%K3} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM24,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VFMADD231PD %YMM2,%YMM1,%YMM3{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM5,%YMM3,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
ADD 0xa0(%RSP),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %YMM1,(%R14,%RBX,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
LEA 0x1(%R11),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R12,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R13,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R15,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP 0x20(%RSP),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RAX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JE 4263e4 <advec_cell_kernel_module_mp_advec_cell_kernel_.DIR.OMP.PARALLEL.2+0x714> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST %R8,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 428870 <advec_cell_kernel_module_mp_advec_cell_kernel_.DIR.OMP.PARALLEL.2+0x2ba0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
Function | advec_cell_kernel_.DIR.OMP.PARALLEL.2 |
Source file and lines | advec_cell_kernel.f90:83-157 |
Module | exec |
nb instructions | 68 |
nb uops | 71 |
loop length | 385 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 10 |
used zmm registers | 0 |
nb stack references | 27 |
micro-operation queue | 11.83 cycles |
front end | 11.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.75 | 6.00 | 3.75 | 3.50 | 2.00 | 11.33 | 11.33 | 11.33 | 0.00 | 3.00 | 0.00 | 0.00 | 2.50 | 2.50 |
cycles | 3.75 | 6.00 | 3.75 | 3.50 | 2.00 | 11.33 | 11.33 | 11.33 | 0.00 | 3.00 | 0.00 | 0.00 | 2.50 | 2.50 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 1.00 |
Front-end | 11.83 |
Dispatch | 11.33 |
Data deps. | 1.00 |
Overall L1 | 11.83 |
all | 2% |
load | 0% |
store | 16% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 100% |
load | 100% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 26% |
load | 20% |
store | 50% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
all | 13% |
load | 12% |
store | 18% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 50% |
load | 50% |
store | 50% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 22% |
load | 20% |
store | 31% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 24% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LEA 0x1(%R11),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R12,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R13,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R15,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP 0x20(%RSP),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RAX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JE 4263e4 <advec_cell_kernel_module_mp_advec_cell_kernel_.DIR.OMP.PARALLEL.2+0x714> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST %R8,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 428870 <advec_cell_kernel_module_mp_advec_cell_kernel_.DIR.OMP.PARALLEL.2+0x2ba0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVAPS %YMM30,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPS %YMM29,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPS %YMM28,0x1e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPS %YMM26,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %YMM11,%YMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM25,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM23,%YMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x1a0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R11,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %R11,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB 0x18(%RSP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
IMUL %RAX,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R15,0x280(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x30(%RSP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %RAX,%RBX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RBX,%YMM13 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x8(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %RAX,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R15,%YMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
IMUL %RAX,%R13 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R13,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %RAX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R14,%YMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDQA %YMM2,0x2a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
IMUL %R12,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0xb8(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R10,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0xb0(%RSP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x48(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x98(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R9,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x310(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x318(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x308(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R8,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R8,0x328(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV 0x338(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x330(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x320(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R11,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x58(%RSP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x180(%RSP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVAPD %YMM29,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM11,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM30,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD 0x100(%RSP),%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x1e0(%RSP),%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x1c0(%RSP),%YMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x160(%RSP),%YMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV 0x38(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
JNE 4288d7 <advec_cell_kernel_module_mp_advec_cell_kernel_.DIR.OMP.PARALLEL.2+0x2c07> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
JMP 428c12 <advec_cell_kernel_module_mp_advec_cell_kernel_.DIR.OMP.PARALLEL.2+0x2f42> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Function | advec_cell_kernel_.DIR.OMP.PARALLEL.2 |
Source file and lines | advec_cell_kernel.f90:83-157 |
Module | exec |
nb instructions | 202 |
nb uops | 390 |
loop length | 1207 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 28 |
used zmm registers | 0 |
nb stack references | 40 |
ADD-SUB / MUL ratio | 0.78 |
micro-operation queue | 65.00 cycles |
front end | 65.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.25 | 6.00 | 5.25 | 5.00 | 1.50 | 18.67 | 18.67 | 18.67 | 33.50 | 33.58 | 33.50 | 33.42 | 27.50 | 27.50 |
cycles | 5.25 | 6.00 | 5.25 | 5.00 | 1.50 | 18.67 | 18.67 | 18.67 | 33.50 | 33.58 | 33.50 | 33.42 | 27.50 | 27.50 |
Cycles executing div or sqrt instructions | 15.00 |
Longest recurrence chain latency (RecMII) | 1.00 |
Front-end | 65.00 |
Dispatch | 33.58 |
DIV/SQRT | 15.00 |
Data deps. | 1.00 |
Overall L1 | 65.00 |
all | 56% |
load | 55% |
store | 16% |
mul | 0% |
add-sub | 86% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 60% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 80% |
load | 73% |
store | 58% |
mul | 75% |
add-sub | 90% |
fma | 100% |
div/sqrt | 100% |
other | 85% |
all | 28% |
load | 28% |
store | 18% |
mul | 12% |
add-sub | 41% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 24% |
all | 48% |
load | 50% |
store | 50% |
mul | 50% |
add-sub | 50% |
fma | 50% |
div/sqrt | 50% |
other | 47% |
all | 39% |
load | 37% |
store | 34% |
mul | 40% |
add-sub | 43% |
fma | 50% |
div/sqrt | 50% |
other | 39% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VPADDQ 0x360(%RSP),%YMM1,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VPADDQ 0x3a0(%RSP),%YMM13,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RAX,%YMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDQA 0x3c0(%RSP),%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPSUBQ %YMM3,%YMM5,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPCMPNLEUQ 0xcf839(%RIP),%YMM13,%K1 | |||||||||||||||||
MOV 0x280(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD 0x140(%RSP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV 0x260(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%RAX,%RSI,1),%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB 0x60(%RSP),%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD (%RCX,%RBX,8),%YMM13{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD %YMM13,%YMM31{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%YMM31,%YMM12,%K2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPBROADCASTD %EAX,%XMM13 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPOR 0xd0d66(%RIP),%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VPADDD 0x200(%RSP),%XMM13,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VPMINSD %XMM0,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPADDQ %YMM3,%YMM4,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPMOVQD %YMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 0.50 |
VPADDD 0x2f0(%RSP),%XMM13,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VPBLENDMD %XMM3,%XMM7,%XMM8{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMOVDQA %XMM5,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDD 0x2e0(%RSP),%XMM13,%XMM9{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VMOVDQA32 %XMM7,%XMM5{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQA32 %XMM7,%XMM3{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VANDPD %YMM14,%YMM31,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPMOVSXDQ %XMM3,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPSUBQ %YMM6,%YMM3,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPSLLQ $0x3,%YMM3,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDQ %YMM3,%YMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
VPXOR %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD (,%YMM1,1),%YMM13{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VMOVAPD %YMM13,%YMM30{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x98(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVUPD (%RAX,%RBX,8),%YMM1{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPMOVSXDQ %XMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPSUBQ %YMM6,%YMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPSLLQ $0x3,%YMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDQ 0x380(%RSP),%YMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (,%YMM5,1),%YMM13{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VDIVPD %YMM30,%YMM7,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 |
VMOVAPD %YMM1,%YMM29{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM29,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VFMADD213PD %YMM29,%YMM5,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM13,%YMM28{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VDIVPD %YMM28,%YMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 |
VPADDQ %YMM3,%YMM2,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (,%YMM7,1),%YMM10{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VSUBPD %YMM5,%YMM16,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVAPD %YMM10,%YMM27{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMOVSXDQ %XMM9,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPSUBQ %YMM6,%YMM7,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPSLLQ $0x3,%YMM7,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDQ %YMM7,%YMM2,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (,%YMM9,1),%YMM10{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VMOVAPD %YMM10,%YMM26{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VSUBPD %YMM26,%YMM27,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VPMOVSXDQ %XMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 2 | 0.50 |
VPSUBQ %YMM6,%YMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VPSLLQ $0x3,%YMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDQ %YMM2,%YMM8,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (,%YMM2,1),%YMM10{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VMOVAPD %YMM10,%YMM11{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VSUBPD %YMM27,%YMM11,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM2,%YMM9,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x1,%YMM10,%YMM12,%K2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPD %YMM14,%YMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VANDPD %YMM2,%YMM14,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMULPD %YMM1,%YMM9,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD231PD %YMM13,%YMM10,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM18,%YMM22,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x2,%YMM22,%YMM10,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM10,%YMM22{%K3} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x2,%YMM12,%YMM2,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %YMM5,%YMM15,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VXORPD %YMM17,%YMM2,%YMM2{%K3} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VCMPPD $0x2,%YMM22,%YMM9,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM9,%YMM22{%K3} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM27,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VFMADD231PD %YMM2,%YMM22,%YMM5{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQA 0x2a0(%RSP),%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPADDQ 0x340(%RSP),%YMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
VMULPD %YMM31,%YMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD 0x88(%RSP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %YMM5,(%RAX,%RBX,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VPADDQ %YMM3,%YMM2,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (,%YMM3,1),%YMM9{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VPADDQ %YMM7,%YMM2,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
VPXOR %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VGATHERQPD (,%YMM3,1),%YMM7{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VMOVAPD %YMM9,%YMM24{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM7,%YMM25{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VSUBPD %YMM25,%YMM24,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VPADDQ %YMM2,%YMM8,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (,%YMM2,1),%YMM7{%K2} | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 1.42 | 1.42 | 1.42 | 3 | 3 | 0-16 | 4 |
VMOVAPD %YMM7,%YMM23{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VSUBPD %YMM24,%YMM23,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMULPD %YMM3,%YMM2,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x1,%YMM7,%YMM12,%K2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPD %YMM5,%YMM14,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMULPD %YMM30,%YMM27,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VDIVPD %YMM8,%YMM7,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 |
VANDPD %YMM3,%YMM14,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VANDPD %YMM2,%YMM14,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VMULPD %YMM1,%YMM3,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD231PD %YMM13,%YMM8,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM18,%YMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VCMPPD $0x2,%YMM1,%YMM8,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM8,%YMM1{%K3} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x2,%YMM12,%YMM2,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %YMM7,%YMM15,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VXORPD %YMM17,%YMM2,%YMM2{%K3} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VCMPPD $0x2,%YMM1,%YMM3,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM3,%YMM1{%K3} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM24,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VFMADD231PD %YMM2,%YMM1,%YMM3{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM5,%YMM3,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 |
ADD 0xa0(%RSP),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVUPD %YMM1,(%R14,%RBX,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
LEA 0x1(%R11),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R12,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R13,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R15,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP 0x20(%RSP),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RAX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JE 4263e4 <advec_cell_kernel_module_mp_advec_cell_kernel_.DIR.OMP.PARALLEL.2+0x714> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
TEST %R8,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 428870 <advec_cell_kernel_module_mp_advec_cell_kernel_.DIR.OMP.PARALLEL.2+0x2ba0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVAPS %YMM30,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPS %YMM29,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPS %YMM28,0x1e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPS %YMM26,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %YMM11,%YMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM25,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM23,%YMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x1a0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R11,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %R11,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB 0x18(%RSP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
IMUL %RAX,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R15,0x280(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x30(%RSP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %RAX,%RBX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RBX,%YMM13 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x8(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %RAX,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R15,%YMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
IMUL %RAX,%R13 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R13,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %RAX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R14,%YMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVDQA %YMM2,0x2a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
IMUL %R12,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0xb8(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R10,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0xb0(%RSP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x48(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x98(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R9,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x310(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x318(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x308(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R8,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R8,0x328(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV 0x338(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x330(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x320(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R11,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x58(%RSP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x180(%RSP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVAPD %YMM29,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM11,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %YMM30,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD 0x100(%RSP),%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x1e0(%RSP),%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x1c0(%RSP),%YMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD 0x160(%RSP),%YMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV 0x38(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
JNE 4288d7 <advec_cell_kernel_module_mp_advec_cell_kernel_.DIR.OMP.PARALLEL.2+0x2c07> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |