Loop Id: 502 | Module: libkripke.so | Source: RangeSegment.hpp:120-120 [...] | Coverage: 0.02% |
---|
Loop Id: 502 | Module: libkripke.so | Source: RangeSegment.hpp:120-120 [...] | Coverage: 0.02% |
---|
0x54858 MOV 0x88(%RSP),%R10 |
0x54860 MOV 0x40(%RSP),%RDI |
0x54865 MOV 0x28(%RSP),%R8 |
0x5486a MOV 0x48(%RSP),%R11 |
0x5486f MOV 0x30(%RSP),%R9 |
0x54874 ADD %R10,%RDI |
0x54877 MOV 0x50(%RSP),%RAX |
0x5487c IMUL %RDI,%R11 |
0x54880 MOV 0x20(%RSP),%RCX |
0x54885 MOV 0x18(%RSP),%R12 |
0x5488a IMUL %R8,%R9 |
0x5488e MOV 0x38(%RSP),%RDX |
0x54893 IMUL %RAX,%RDI |
0x54897 ADD %R8,%RCX |
0x5489a MOV 0x60(%RSP),%R8 |
0x5489f LEA (%RDX,%RCX,8),%RSI |
0x548a3 ADD %R9,%R11 |
0x548a6 MOV 0x8(%RSP),%R9 |
0x548ab ADD %R12,%RDI |
0x548ae MOV 0x58(%RSP),%R12 |
0x548b3 ADD %R8,%R9 |
0x548b6 LEA (%RBX,%RDI,1),%R10 |
0x548ba LEA (%RDX,%R9,8),%R8 |
0x548be LEA (%R11,%RBX,1),%RDX |
0x548c2 ADD %R11,%R12 |
0x548c5 MOV %RDX,0xa8(%RSP) |
0x548cd LEA (%R15,%R10,8),%RCX |
0x548d1 LEA (%R14,%R12,8),%R10 |
0x548d5 MOV %R10,0xd0(%RSP) |
0x548dd LEA (%R14,%RDX,8),%RAX |
0x548e1 XOR %R10D,%R10D |
0x548e4 NOPL (%RAX) |
(501) 0x548e8 CMPQ $0x1,0xd8(%RSP) |
(501) 0x548f1 JE 54db8 |
(501) 0x548f7 MOV 0xd0(%RSP),%R9 |
(501) 0x548ff LEA -0x8(%R8),%RDX |
(501) 0x54903 CMP %R9,%RDX |
(501) 0x54906 SETAE %R12B |
(501) 0x5490a CMP %R8,%RAX |
(501) 0x5490d SETAE %DL |
(501) 0x54910 OR %R12B,%DL |
(501) 0x54913 JE 54bf8 |
(501) 0x54919 LEA 0x8(%RCX),%R9 |
(501) 0x5491d MOV %RAX,%R12 |
(501) 0x54920 SUB %R9,%R12 |
(501) 0x54923 CMP $0x10,%R12 |
(501) 0x54927 JBE 54bf8 |
(501) 0x5492d CMPQ $0x2,0x98(%RSP) |
(501) 0x54936 JBE 54dce |
(501) 0x5493c MOV 0xb8(%RSP),%R12 |
(501) 0x54944 VBROADCASTSD (%RSI),%YMM2 |
(501) 0x54949 XOR %EDX,%EDX |
(501) 0x5494b SUB $0x20,%R12 |
(501) 0x5494f SHR $0x5,%R12 |
(501) 0x54953 INC %R12 |
(501) 0x54956 AND $0x7,%R12D |
(501) 0x5495a JE 54a20 |
(501) 0x54960 CMP $0x1,%R12 |
(501) 0x54964 JE 549fe |
(501) 0x5496a CMP $0x2,%R12 |
(501) 0x5496e JE 549ea |
(501) 0x54970 CMP $0x3,%R12 |
(501) 0x54974 JE 549d6 |
(501) 0x54976 CMP $0x4,%R12 |
(501) 0x5497a JE 549c2 |
(501) 0x5497c CMP $0x5,%R12 |
(501) 0x54980 JE 549ae |
(501) 0x54982 CMP $0x6,%R12 |
(501) 0x54986 JE 5499a |
(501) 0x54988 VMOVUPD (%RCX),%YMM1 |
(501) 0x5498c VFMADD213PD (%RAX),%YMM2,%YMM1 |
(501) 0x54991 MOV $0x20,%EDX |
(501) 0x54996 VMOVUPD %YMM1,(%RAX) |
(501) 0x5499a VMOVUPD (%RCX,%RDX,1),%YMM3 |
(501) 0x5499f VFMADD213PD (%RAX,%RDX,1),%YMM2,%YMM3 |
(501) 0x549a5 VMOVUPD %YMM3,(%RAX,%RDX,1) |
(501) 0x549aa ADD $0x20,%RDX |
(501) 0x549ae VMOVUPD (%RCX,%RDX,1),%YMM5 |
(501) 0x549b3 VFMADD213PD (%RAX,%RDX,1),%YMM2,%YMM5 |
(501) 0x549b9 VMOVUPD %YMM5,(%RAX,%RDX,1) |
(501) 0x549be ADD $0x20,%RDX |
(501) 0x549c2 VMOVUPD (%RCX,%RDX,1),%YMM6 |
(501) 0x549c7 VFMADD213PD (%RAX,%RDX,1),%YMM2,%YMM6 |
(501) 0x549cd VMOVUPD %YMM6,(%RAX,%RDX,1) |
(501) 0x549d2 ADD $0x20,%RDX |
(501) 0x549d6 VMOVUPD (%RCX,%RDX,1),%YMM7 |
(501) 0x549db VFMADD213PD (%RAX,%RDX,1),%YMM2,%YMM7 |
(501) 0x549e1 VMOVUPD %YMM7,(%RAX,%RDX,1) |
(501) 0x549e6 ADD $0x20,%RDX |
(501) 0x549ea VMOVUPD (%RCX,%RDX,1),%YMM8 |
(501) 0x549ef VFMADD213PD (%RAX,%RDX,1),%YMM2,%YMM8 |
(501) 0x549f5 VMOVUPD %YMM8,(%RAX,%RDX,1) |
(501) 0x549fa ADD $0x20,%RDX |
(501) 0x549fe VMOVUPD (%RCX,%RDX,1),%YMM9 |
(501) 0x54a03 VFMADD213PD (%RAX,%RDX,1),%YMM2,%YMM9 |
(501) 0x54a09 VMOVUPD %YMM9,(%RAX,%RDX,1) |
(501) 0x54a0e ADD $0x20,%RDX |
(501) 0x54a12 CMP %RDX,0xb8(%RSP) |
(501) 0x54a1a JE 54aee |
(504) 0x54a20 VMOVUPD (%RCX,%RDX,1),%YMM10 |
(504) 0x54a25 VFMADD213PD (%RAX,%RDX,1),%YMM2,%YMM10 |
(504) 0x54a2b VMOVUPD %YMM10,(%RAX,%RDX,1) |
(504) 0x54a30 VMOVUPD 0x20(%RDX,%RCX,1),%YMM11 |
(504) 0x54a36 VFMADD213PD 0x20(%RAX,%RDX,1),%YMM2,%YMM11 |
(504) 0x54a3d VMOVUPD %YMM11,0x20(%RAX,%RDX,1) |
(504) 0x54a43 VMOVUPD 0x40(%RDX,%RCX,1),%YMM12 |
(504) 0x54a49 VFMADD213PD 0x40(%RAX,%RDX,1),%YMM2,%YMM12 |
(504) 0x54a50 VMOVUPD %YMM12,0x40(%RAX,%RDX,1) |
(504) 0x54a56 VMOVUPD 0x60(%RDX,%RCX,1),%YMM13 |
(504) 0x54a5c VFMADD213PD 0x60(%RAX,%RDX,1),%YMM2,%YMM13 |
(504) 0x54a63 VMOVUPD %YMM13,0x60(%RAX,%RDX,1) |
(504) 0x54a69 VMOVUPD 0x80(%RDX,%RCX,1),%YMM14 |
(504) 0x54a72 VFMADD213PD 0x80(%RAX,%RDX,1),%YMM2,%YMM14 |
(504) 0x54a7c VMOVUPD %YMM14,0x80(%RAX,%RDX,1) |
(504) 0x54a85 VMOVUPD 0xa0(%RDX,%RCX,1),%YMM15 |
(504) 0x54a8e VFMADD213PD 0xa0(%RAX,%RDX,1),%YMM2,%YMM15 |
(504) 0x54a98 VMOVUPD %YMM15,0xa0(%RAX,%RDX,1) |
(504) 0x54aa1 VMOVUPD 0xc0(%RDX,%RCX,1),%YMM0 |
(504) 0x54aaa VFMADD213PD 0xc0(%RAX,%RDX,1),%YMM2,%YMM0 |
(504) 0x54ab4 VMOVUPD %YMM0,0xc0(%RAX,%RDX,1) |
(504) 0x54abd VMOVUPD 0xe0(%RDX,%RCX,1),%YMM4 |
(504) 0x54ac6 VFMADD213PD 0xe0(%RAX,%RDX,1),%YMM2,%YMM4 |
(504) 0x54ad0 VMOVUPD %YMM4,0xe0(%RAX,%RDX,1) |
(504) 0x54ad9 ADD $0x100,%RDX |
(504) 0x54ae0 CMP %RDX,0xb8(%RSP) |
(504) 0x54ae8 JNE 54a20 |
(501) 0x54aee CMPQ $0,0x90(%RSP) |
(501) 0x54af7 JE 54b80 |
(501) 0x54afd MOV 0x78(%RSP),%R9 |
(501) 0x54b02 MOV %R9,%RDX |
(501) 0x54b05 CMP $0x1,%R9 |
(501) 0x54b09 JE 54dea |
(501) 0x54b0f MOV 0x80(%RSP),%R9 |
(501) 0x54b17 MOV %R9,0xa0(%RSP) |
(501) 0x54b1f MOV 0xa8(%RSP),%R12 |
(501) 0x54b27 VMOVDDUP (%RSI),%XMM2 |
(501) 0x54b2b ADD %R9,%R12 |
(501) 0x54b2e ADD %RBX,%R9 |
(501) 0x54b31 LEA (%R14,%R12,8),%R12 |
(501) 0x54b35 ADD %RDI,%R9 |
(501) 0x54b38 VMOVUPD (%R12),%XMM1 |
(501) 0x54b3e VFMADD132PD (%R15,%R9,8),%XMM1,%XMM2 |
(501) 0x54b44 VMOVUPD %XMM2,(%R12) |
(501) 0x54b4a TEST $0x1,%DL |
(501) 0x54b4d JE 54b80 |
(501) 0x54b4f MOV 0xa0(%RSP),%R9 |
(501) 0x54b57 AND $-0x2,%RDX |
(501) 0x54b5b ADD %R9,%RDX |
(501) 0x54b5e ADD %RBX,%RDX |
(501) 0x54b61 VMOVSD (%RSI),%XMM3 |
(501) 0x54b65 LEA (%R11,%RDX,1),%R12 |
(501) 0x54b69 ADD %RDI,%RDX |
(501) 0x54b6c LEA (%R14,%R12,8),%R9 |
(501) 0x54b70 VMOVSD (%R9),%XMM5 |
(501) 0x54b75 VFMADD132SD (%R15,%RDX,8),%XMM5,%XMM3 |
(501) 0x54b7b VMOVSD %XMM3,(%R9) |
(501) 0x54b80 MOV 0xc8(%RSP),%R9 |
(501) 0x54b88 INC %R10 |
(501) 0x54b8b ADD %R13,%RSI |
(501) 0x54b8e ADD %R13,%R8 |
(501) 0x54b91 MOV 0xb0(%RSP),%R12 |
(501) 0x54b99 MOV 0xc0(%RSP),%RDX |
(501) 0x54ba1 ADD %R9,%RDI |
(501) 0x54ba4 ADD %R12,%RCX |
(501) 0x54ba7 CMP %RDX,%R10 |
(501) 0x54baa JNE 548e8 |
0x54bb0 MOV 0x10(%RSP),%RDI |
0x54bb5 CMP %RDI,0x70(%RSP) |
0x54bba JE 54be1 |
0x54bbc INCQ 0x88(%RSP) |
0x54bc4 MOV 0x88(%RSP),%R11 |
0x54bcc CMP %R11,0x68(%RSP) |
0x54bd1 JLE 54df7 |
0x54bd7 INCQ 0x70(%RSP) |
0x54bdc JMP 54858 |
(501) 0x54bf8 MOV 0xd8(%RSP),%R12 |
(501) 0x54c00 XOR %EDX,%EDX |
(501) 0x54c02 AND $0x7,%R12D |
(501) 0x54c06 JE 54ce6 |
(501) 0x54c0c CMP $0x1,%R12 |
(501) 0x54c10 JE 54cc1 |
(501) 0x54c16 CMP $0x2,%R12 |
(501) 0x54c1a JE 54caa |
(501) 0x54c20 CMP $0x3,%R12 |
(501) 0x54c24 JE 54c93 |
(501) 0x54c26 CMP $0x4,%R12 |
(501) 0x54c2a JE 54c7c |
(501) 0x54c2c CMP $0x5,%R12 |
(501) 0x54c30 JE 54c65 |
(501) 0x54c32 CMP $0x6,%R12 |
(501) 0x54c36 JE 54c4e |
(501) 0x54c38 VMOVSD (%RCX),%XMM7 |
(501) 0x54c3c VMOVSD (%RAX),%XMM6 |
(501) 0x54c40 MOV $0x1,%EDX |
(501) 0x54c45 VFMADD132SD (%RSI),%XMM6,%XMM7 |
(501) 0x54c4a VMOVSD %XMM7,(%RAX) |
(501) 0x54c4e VMOVSD (%RCX,%RDX,8),%XMM8 |
(501) 0x54c53 VMOVSD (%RAX,%RDX,8),%XMM9 |
(501) 0x54c58 VFMADD132SD (%RSI),%XMM9,%XMM8 |
(501) 0x54c5d VMOVSD %XMM8,(%RAX,%RDX,8) |
(501) 0x54c62 INC %RDX |
(501) 0x54c65 VMOVSD (%RCX,%RDX,8),%XMM10 |
(501) 0x54c6a VMOVSD (%RAX,%RDX,8),%XMM11 |
(501) 0x54c6f VFMADD132SD (%RSI),%XMM11,%XMM10 |
(501) 0x54c74 VMOVSD %XMM10,(%RAX,%RDX,8) |
(501) 0x54c79 INC %RDX |
(501) 0x54c7c VMOVSD (%RCX,%RDX,8),%XMM12 |
(501) 0x54c81 VMOVSD (%RAX,%RDX,8),%XMM13 |
(501) 0x54c86 VFMADD132SD (%RSI),%XMM13,%XMM12 |
(501) 0x54c8b VMOVSD %XMM12,(%RAX,%RDX,8) |
(501) 0x54c90 INC %RDX |
(501) 0x54c93 VMOVSD (%RCX,%RDX,8),%XMM14 |
(501) 0x54c98 VMOVSD (%RAX,%RDX,8),%XMM15 |
(501) 0x54c9d VFMADD132SD (%RSI),%XMM15,%XMM14 |
(501) 0x54ca2 VMOVSD %XMM14,(%RAX,%RDX,8) |
(501) 0x54ca7 INC %RDX |
(501) 0x54caa VMOVSD (%RCX,%RDX,8),%XMM0 |
(501) 0x54caf VMOVSD (%RAX,%RDX,8),%XMM4 |
(501) 0x54cb4 VFMADD132SD (%RSI),%XMM4,%XMM0 |
(501) 0x54cb9 VMOVSD %XMM0,(%RAX,%RDX,8) |
(501) 0x54cbe INC %RDX |
(501) 0x54cc1 VMOVSD (%RCX,%RDX,8),%XMM2 |
(501) 0x54cc6 VMOVSD (%RAX,%RDX,8),%XMM1 |
(501) 0x54ccb VFMADD132SD (%RSI),%XMM1,%XMM2 |
(501) 0x54cd0 VMOVSD %XMM2,(%RAX,%RDX,8) |
(501) 0x54cd5 INC %RDX |
(501) 0x54cd8 CMP %RDX,0xd8(%RSP) |
(501) 0x54ce0 JE 54b80 |
(503) 0x54ce6 VMOVSD (%RCX,%RDX,8),%XMM3 |
(503) 0x54ceb VMOVSD (%RAX,%RDX,8),%XMM5 |
(503) 0x54cf0 VFMADD132SD (%RSI),%XMM5,%XMM3 |
(503) 0x54cf5 VMOVSD 0x8(%RAX,%RDX,8),%XMM6 |
(503) 0x54cfb VMOVSD 0x10(%RAX,%RDX,8),%XMM9 |
(503) 0x54d01 VMOVSD 0x18(%RAX,%RDX,8),%XMM11 |
(503) 0x54d07 VMOVSD 0x20(%RAX,%RDX,8),%XMM13 |
(503) 0x54d0d VMOVSD 0x28(%RAX,%RDX,8),%XMM15 |
(503) 0x54d13 VMOVSD 0x30(%RAX,%RDX,8),%XMM4 |
(503) 0x54d19 VMOVSD 0x38(%RAX,%RDX,8),%XMM1 |
(503) 0x54d1f VMOVSD %XMM3,(%RAX,%RDX,8) |
(503) 0x54d24 VMOVSD 0x8(%RCX,%RDX,8),%XMM7 |
(503) 0x54d2a VFMADD132SD (%RSI),%XMM6,%XMM7 |
(503) 0x54d2f VMOVSD %XMM7,0x8(%RAX,%RDX,8) |
(503) 0x54d35 VMOVSD 0x10(%RCX,%RDX,8),%XMM8 |
(503) 0x54d3b VFMADD132SD (%RSI),%XMM9,%XMM8 |
(503) 0x54d40 VMOVSD %XMM8,0x10(%RAX,%RDX,8) |
(503) 0x54d46 VMOVSD 0x18(%RCX,%RDX,8),%XMM10 |
(503) 0x54d4c VFMADD132SD (%RSI),%XMM11,%XMM10 |
(503) 0x54d51 VMOVSD %XMM10,0x18(%RAX,%RDX,8) |
(503) 0x54d57 VMOVSD 0x20(%RCX,%RDX,8),%XMM12 |
(503) 0x54d5d VFMADD132SD (%RSI),%XMM13,%XMM12 |
(503) 0x54d62 VMOVSD %XMM12,0x20(%RAX,%RDX,8) |
(503) 0x54d68 VMOVSD 0x28(%RCX,%RDX,8),%XMM14 |
(503) 0x54d6e VFMADD132SD (%RSI),%XMM15,%XMM14 |
(503) 0x54d73 VMOVSD %XMM14,0x28(%RAX,%RDX,8) |
(503) 0x54d79 VMOVSD 0x30(%RCX,%RDX,8),%XMM0 |
(503) 0x54d7f VFMADD132SD (%RSI),%XMM4,%XMM0 |
(503) 0x54d84 VMOVSD %XMM0,0x30(%RAX,%RDX,8) |
(503) 0x54d8a VMOVSD 0x38(%RCX,%RDX,8),%XMM2 |
(503) 0x54d90 VFMADD132SD (%RSI),%XMM1,%XMM2 |
(503) 0x54d95 VMOVSD %XMM2,0x38(%RAX,%RDX,8) |
(503) 0x54d9b ADD $0x8,%RDX |
(503) 0x54d9f CMP %RDX,0xd8(%RSP) |
(503) 0x54da7 JNE 54ce6 |
(501) 0x54dad JMP 54b80 |
(501) 0x54db8 VMOVSD (%RCX),%XMM0 |
(501) 0x54dbc VMOVSD (%RAX),%XMM4 |
(501) 0x54dc0 VFMADD132SD (%RSI),%XMM4,%XMM0 |
(501) 0x54dc5 VMOVSD %XMM0,(%RAX) |
(501) 0x54dc9 JMP 54b80 |
(501) 0x54dce MOVQ $0,0xa0(%RSP) |
(501) 0x54dda MOV 0xd8(%RSP),%RDX |
(501) 0x54de2 XOR %R9D,%R9D |
(501) 0x54de5 JMP 54b1f |
(501) 0x54dea MOV 0x80(%RSP),%RDX |
(501) 0x54df2 JMP 54b5e |
0x54df7 INCQ 0x60(%RSP) |
0x54dfc MOV (%RSP),%RCX |
0x54e00 MOV 0x60(%RSP),%RSI |
0x54e05 MOVQ $0,0x88(%RSP) |
0x54e11 ADD %RCX,%RSI |
0x54e14 MOV %RSI,0x28(%RSP) |
0x54e19 JMP 54bd7 |
/home/kcamus/qaas_runs/169-391-8990/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/index/RangeSegment.hpp: 120 - 120 |
-------------------------------------------------------------------------------- |
120: RAJA_HOST_DEVICE RAJA_INLINE ~TypedRangeSegment() {} |
/home/kcamus/qaas_runs/169-391-8990/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/util/Layout.hpp: 55 - 55 |
-------------------------------------------------------------------------------- |
55: return a * b; |
/home/kcamus/qaas_runs/169-391-8990/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/policy/loop/forall.hpp: 59 - 59 |
-------------------------------------------------------------------------------- |
59: for (decltype(distance_it) i = 0; i < distance_it; ++i) { |
/home/kcamus/qaas_runs/169-391-8990/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/internal/Iterators.hpp: 177 - 177 |
-------------------------------------------------------------------------------- |
177: return value_type(val + rhs); |
/home/kcamus/qaas_runs/169-391-8990/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/util/Operators.hpp: 307 - 307 |
-------------------------------------------------------------------------------- |
307: return Ret{lhs} + rhs; |
/home/kcamus/qaas_runs/169-391-8990/intel/Kripke/build/Kripke/src/Kripke/Kernel/LTimes.cpp: 62 - 62 |
-------------------------------------------------------------------------------- |
62: phi(nm,g,z) += ell(nm, d) * psi(d, g, z); |
/home/kcamus/qaas_runs/169-391-8990/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/policy/openmp/kernel/Collapse.hpp: 81 - 81 |
-------------------------------------------------------------------------------- |
81: #pragma omp parallel for private(i0, i1) firstprivate(privatizer) \ |
/home/kcamus/qaas_runs/169-391-8990/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/util/View.hpp: 110 - 110 |
-------------------------------------------------------------------------------- |
110: return data[idx]; |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 13.24 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.29 |
Bottlenecks | micro-operation queue, |
Function | void RAJA::internal::StatementExecutor |
Source | RangeSegment.hpp:120-120,Layout.hpp:55-55,forall.hpp:59-59,Iterators.hpp:177-177,Operators.hpp:307-307,LTimes.cpp:62-62,Collapse.hpp:81-81,View.hpp:110-110 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 13.50 |
CQA cycles if no scalar integer | 13.50 |
CQA cycles if FP arith vectorized | 13.50 |
CQA cycles if fully vectorized | 1.02 |
Front-end cycles | 13.50 |
DIV/SQRT cycles | 6.50 |
P0 cycles | 9.50 |
P1 cycles | 10.50 |
P2 cycles | 10.50 |
P3 cycles | 7.00 |
P4 cycles | 6.50 |
P5 cycles | 6.50 |
P6 cycles | 7.00 |
P7 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 12.91 |
Stall cycles (UFS) | 0.00 |
Nb insns | 48.00 |
Nb uops | 54.00 |
Nb loads | 21.00 |
Nb stores | 7.00 |
Nb stack references | 18.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 16.59 |
Bytes prefetched | 0.00 |
Bytes loaded | 168.00 |
Bytes stored | 56.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 11.93 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 11.61 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 13.24 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.29 |
Bottlenecks | micro-operation queue, |
Function | void RAJA::internal::StatementExecutor |
Source | RangeSegment.hpp:120-120,Layout.hpp:55-55,forall.hpp:59-59,Iterators.hpp:177-177,Operators.hpp:307-307,LTimes.cpp:62-62,Collapse.hpp:81-81,View.hpp:110-110 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 13.50 |
CQA cycles if no scalar integer | 13.50 |
CQA cycles if FP arith vectorized | 13.50 |
CQA cycles if fully vectorized | 1.02 |
Front-end cycles | 13.50 |
DIV/SQRT cycles | 6.50 |
P0 cycles | 9.50 |
P1 cycles | 10.50 |
P2 cycles | 10.50 |
P3 cycles | 7.00 |
P4 cycles | 6.50 |
P5 cycles | 6.50 |
P6 cycles | 7.00 |
P7 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 12.91 |
Stall cycles (UFS) | 0.00 |
Nb insns | 48.00 |
Nb uops | 54.00 |
Nb loads | 21.00 |
Nb stores | 7.00 |
Nb stack references | 18.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 16.59 |
Bytes prefetched | 0.00 |
Bytes loaded | 168.00 |
Bytes stored | 56.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 11.93 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 11.61 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Path / |
nb instructions | 48 |
nb uops | 54 |
loop length | 232 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 18 |
micro-operation queue | 13.50 cycles |
front end | 13.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.50 | 10.50 | 10.50 | 7.00 | 6.50 | 6.50 | 7.00 |
cycles | 6.50 | 9.50 | 10.50 | 10.50 | 7.00 | 6.50 | 6.50 | 7.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 12.91 |
Stall cycles | 0.00 |
Front-end | 13.50 |
Dispatch | 10.50 |
Overall L1 | 13.50 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
MOV 0x88(%RSP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x40(%RSP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x28(%RSP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x48(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x30(%RSP),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD %R10,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x50(%RSP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
IMUL %RDI,%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x20(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%RSP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
IMUL %R8,%R9 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x38(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
IMUL %RAX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R8,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x60(%RSP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RDX,%RCX,8),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R9,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x8(%RSP),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD %R12,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x58(%RSP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD %R8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%RBX,%RDI,1),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RDX,%R9,8),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R11,%RBX,1),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R11,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RDX,0xa8(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (%R15,%R10,8),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R14,%R12,8),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0xd0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (%R14,%RDX,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x10(%RSP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %RDI,0x70(%RSP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JE 54be1 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0x591> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
INCQ 0x88(%RSP) | 3 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
MOV 0x88(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R11,0x68(%RSP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 54df7 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0x7a7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
INCQ 0x70(%RSP) | 3 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
JMP 54858 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0x208> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
INCQ 0x60(%RSP) | 3 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
MOV (%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x60(%RSP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVQ $0,0x88(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
ADD %RCX,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RSI,0x28(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 54bd7 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0x587> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
nb instructions | 48 |
nb uops | 54 |
loop length | 232 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 18 |
micro-operation queue | 13.50 cycles |
front end | 13.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.50 | 10.50 | 10.50 | 7.00 | 6.50 | 6.50 | 7.00 |
cycles | 6.50 | 9.50 | 10.50 | 10.50 | 7.00 | 6.50 | 6.50 | 7.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 12.91 |
Stall cycles | 0.00 |
Front-end | 13.50 |
Dispatch | 10.50 |
Overall L1 | 13.50 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 12% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
MOV 0x88(%RSP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x40(%RSP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x28(%RSP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x48(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x30(%RSP),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD %R10,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x50(%RSP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
IMUL %RDI,%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x20(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%RSP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
IMUL %R8,%R9 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x38(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
IMUL %RAX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R8,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x60(%RSP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (%RDX,%RCX,8),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R9,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x8(%RSP),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD %R12,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x58(%RSP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD %R8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%RBX,%RDI,1),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RDX,%R9,8),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R11,%RBX,1),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R11,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RDX,0xa8(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (%R15,%R10,8),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R14,%R12,8),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0xd0(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (%R14,%RDX,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x10(%RSP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %RDI,0x70(%RSP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JE 54be1 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0x591> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
INCQ 0x88(%RSP) | 3 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
MOV 0x88(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R11,0x68(%RSP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 54df7 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0x7a7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
INCQ 0x70(%RSP) | 3 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
JMP 54858 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0x208> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
INCQ 0x60(%RSP) | 3 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
MOV (%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x60(%RSP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOVQ $0,0x88(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
ADD %RCX,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RSI,0x28(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 54bd7 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl2EEEEJNS2_3ForILl1ENS_6policy4loop9loop_execEJNS8_ILl3ESB_JNS2_6LambdaILl0EJEEEEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSG_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke6MomentElPSS_EESS_EENSO_INSQ_INSR_9DirectionElPSW_EESW_EENSO_INSQ_INSR_5GroupElPS10_EES10_EENSO_INSQ_INSR_4ZoneElPS14_EES14_EEEEENSM_IJEEEJZNK10LTimesSdomclINSR_11ArchLayoutTINSR_12ArchT_OpenMPENSR_11LayoutT_DGZEEEEEvT_NSR_6SdomIdERKNSR_4Core3SetES1L_S1L_S1L_RNS1I_5FieldIdJSW_S10_S14_EEERNS1M_IdJSS_S10_S14_EEERNS1M_IdJSS_SW_EEEEUlSS_SW_S10_S14_E_EEEEEvOS1G_._omp_fn.0+0x587> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |