Function: clover_unpack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split99 | Module: exec | Source: pack_kernel.f90:108-116 | Coverage: 0.01% |
---|
Function: clover_unpack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split99 | Module: exec | Source: pack_kernel.f90:108-116 | Coverage: 0.01% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-861-0321/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/pack_kernel.f90: 108 - 116 |
-------------------------------------------------------------------------------- |
108: !$OMP PARALLEL DO PRIVATE(index) |
109: DO k=y_min-depth,y_max+y_inc+depth |
110: !$OMP SIMD |
111: DO j=1,depth |
112: index= buffer_offset + j+(k+depth-1)*depth |
113: field(x_min-j,k)=left_rcv_buffer(index) |
114: ENDDO |
115: ENDDO |
116: !$OMP END PARALLEL DO |
0x4417b0 PUSH %RBP |
0x4417b1 MOV %RSP,%RBP |
0x4417b4 PUSH %R15 |
0x4417b6 PUSH %R14 |
0x4417b8 PUSH %R13 |
0x4417ba PUSH %R12 |
0x4417bc PUSH %RBX |
0x4417bd SUB $0x68,%RSP |
0x4417c1 MOV %R8,-0x50(%RBP) |
0x4417c5 MOV %RCX,-0x60(%RBP) |
0x4417c9 MOV 0x28(%RBP),%EAX |
0x4417cc MOVL $0,-0x44(%RBP) |
0x4417d3 TEST %EAX,%EAX |
0x4417d5 JS 441833 |
0x4417d7 MOV %RDX,%R12 |
0x4417da MOV (%RDI),%ESI |
0x4417dc MOVL $0,-0x34(%RBP) |
0x4417e3 MOV %EAX,-0x30(%RBP) |
0x4417e6 MOVL $0x1,-0x40(%RBP) |
0x4417ed SUB $0x8,%RSP |
0x4417f1 LEA -0x40(%RBP),%RAX |
0x4417f5 LEA -0x44(%RBP),%RCX |
0x4417f9 LEA -0x34(%RBP),%R8 |
0x4417fd LEA -0x30(%RBP),%R9 |
0x441801 MOV $0x573540,%EDI |
0x441806 MOV %ESI,-0x38(%RBP) |
0x441809 MOV $0x22,%EDX |
0x44180e PUSH $0x1 |
0x441810 PUSH $0x1 |
0x441812 PUSH %RAX |
0x441813 CALL 404670 <__kmpc_for_static_init_4@plt> |
0x441818 ADD $0x20,%RSP |
0x44181c MOV -0x34(%RBP),%ECX |
0x44181f MOV -0x30(%RBP),%EDX |
0x441822 SUB %ECX,%EDX |
0x441824 JAE 441842 |
0x441826 MOV $0x573560,%EDI |
0x44182b MOV -0x38(%RBP),%ESI |
0x44182e CALL 404230 <__kmpc_for_static_fini@plt> |
0x441833 ADD $0x68,%RSP |
0x441837 POP %RBX |
0x441838 POP %R12 |
0x44183a POP %R13 |
0x44183c POP %R14 |
0x44183e POP %R15 |
0x441840 POP %RBP |
0x441841 RET |
0x441842 MOV -0x50(%RBP),%RAX |
0x441846 MOV (%RAX),%EAX |
0x441848 LEA (%RCX,%R12,1),%ESI |
0x44184c LEA (%RCX,%R12,1),%EDI |
0x441850 DEC %EDI |
0x441852 XOR %R9D,%R9D |
0x441855 MOVDQA 0xebe93(%RIP),%XMM0 |
0x44185d MOVDQA 0xebdeb(%RIP),%XMM1 |
0x441865 MOVDQA 0xebe93(%RIP),%XMM2 |
0x44186d MOVDQA 0xebe9b(%RIP),%XMM3 |
0x441875 MOV %RSI,-0x58(%RBP) |
0x441879 MOV %EDX,-0x3c(%RBP) |
0x44187c JMP 441896 |
0x44187e XCHG %AX,%AX |
(435) 0x441880 MOV %R11D,%EAX |
(435) 0x441883 MOV -0x2c(%RBP),%ESI |
(435) 0x441886 LEA 0x1(%R9),%ECX |
(435) 0x44188a INC %EDI |
(435) 0x44188c INC %ESI |
(435) 0x44188e CMP %EDX,%R9D |
(435) 0x441891 MOV %ECX,%R9D |
(435) 0x441894 JE 441826 |
(435) 0x441896 TEST %EAX,%EAX |
(435) 0x441898 JLE 441886 |
(435) 0x44189a MOV %ESI,-0x2c(%RBP) |
(435) 0x44189d MOV -0x60(%RBP),%RCX |
(435) 0x4418a1 MOVSXD (%RCX),%R14 |
(435) 0x4418a4 MOV -0x50(%RBP),%RCX |
(435) 0x4418a8 MOV (%RCX),%R11D |
(435) 0x4418ab MOV 0x1372f6(%RIP),%R12 |
(435) 0x4418b2 MOV 0x137327(%RIP),%R13 |
(435) 0x4418b9 MOV 0x10(%RBP),%R8 |
(435) 0x4418bd MOV (%R8),%R15 |
(435) 0x4418c0 MOV 0x38(%R8),%RCX |
(435) 0x4418c4 MOV 0x18(%RBP),%RSI |
(435) 0x4418c8 MOVSXD (%RSI),%RSI |
(435) 0x4418cb MOV 0x50(%R8),%R10 |
(435) 0x4418cf MOV %EAX,%R8D |
(435) 0x4418d2 MOV %R8,%RBX |
(435) 0x4418d5 MOV $-0x4,%EAX |
(435) 0x4418da AND %RAX,%RBX |
(435) 0x4418dd MOV %R10,-0x88(%RBP) |
(435) 0x4418e4 JE 441b10 |
(435) 0x4418ea MOV -0x58(%RBP),%RAX |
(435) 0x4418ee MOV %R9,-0x80(%RBP) |
(435) 0x4418f2 LEA (%RAX,%R9,1),%EDX |
(435) 0x4418f6 LEA (%R11,%RDI,1),%EAX |
(435) 0x4418fa MOV %R11,-0x78(%RBP) |
(435) 0x4418fe IMUL %R11D,%EAX |
(435) 0x441902 CLTQ |
(435) 0x441904 MOV %R14,-0x70(%RBP) |
(435) 0x441908 ADD %R14,%RAX |
(435) 0x44190b MOVQ %R12,%XMM4 |
(435) 0x441910 PSHUFD $0x44,%XMM4,%XMM4 |
(435) 0x441915 MOVQ %R13,%XMM5 |
(435) 0x44191a PSHUFD $0x44,%XMM5,%XMM5 |
(435) 0x44191f MOVDQA %XMM5,%XMM6 |
(435) 0x441923 PSRLQ $0x20,%XMM6 |
(435) 0x441928 MOVQ %R15,%XMM7 |
(435) 0x44192d MOVSXD %EDX,%RDX |
(435) 0x441930 INC %RDX |
(435) 0x441933 IMUL %R10,%RDX |
(435) 0x441937 MOVQ %RDX,%XMM8 |
(435) 0x44193c PADDQ %XMM7,%XMM8 |
(435) 0x441941 PSHUFD $0x44,%XMM8,%XMM7 |
(435) 0x441947 MOVQ %RCX,%XMM8 |
(435) 0x44194c PSHUFD $0x44,%XMM8,%XMM8 |
(435) 0x441952 MOVDQA %XMM8,%XMM9 |
(435) 0x441957 PSRLQ $0x20,%XMM9 |
(435) 0x44195d MOV %RSI,-0x68(%RBP) |
(435) 0x441961 MOV %RSI,%R14 |
(435) 0x441964 XOR %EDX,%EDX |
(435) 0x441966 NOPW %CS:(%RAX,%RAX,1) |
(437) 0x441970 LEA (%RAX,%RDX,1),%RSI |
(437) 0x441974 MOVQ %RSI,%XMM10 |
(437) 0x441979 PSHUFD $0x44,%XMM10,%XMM10 |
(437) 0x44197f MOVDQA %XMM10,%XMM11 |
(437) 0x441984 PADDQ %XMM0,%XMM11 |
(437) 0x441989 PADDQ %XMM1,%XMM10 |
(437) 0x44198e MOVDQA %XMM6,%XMM12 |
(437) 0x441993 PMULUDQ %XMM10,%XMM12 |
(437) 0x441998 MOVDQA %XMM5,%XMM13 |
(437) 0x44199d PMULUDQ %XMM10,%XMM13 |
(437) 0x4419a2 PSRLQ $0x20,%XMM10 |
(437) 0x4419a8 PMULUDQ %XMM5,%XMM10 |
(437) 0x4419ad PADDQ %XMM12,%XMM10 |
(437) 0x4419b2 PSLLQ $0x20,%XMM10 |
(437) 0x4419b8 MOVDQA %XMM6,%XMM12 |
(437) 0x4419bd PMULUDQ %XMM11,%XMM12 |
(437) 0x4419c2 MOVDQA %XMM5,%XMM14 |
(437) 0x4419c7 PMULUDQ %XMM11,%XMM14 |
(437) 0x4419cc PSRLQ $0x20,%XMM11 |
(437) 0x4419d2 PMULUDQ %XMM5,%XMM11 |
(437) 0x4419d7 PADDQ %XMM12,%XMM11 |
(437) 0x4419dc PSLLQ $0x20,%XMM11 |
(437) 0x4419e2 PADDQ %XMM4,%XMM14 |
(437) 0x4419e7 PADDQ %XMM11,%XMM14 |
(437) 0x4419ec PADDQ %XMM4,%XMM13 |
(437) 0x4419f1 PADDQ %XMM10,%XMM13 |
(437) 0x4419f6 MOVQ %XMM13,%RSI |
(437) 0x4419fb MOVSD (%RSI),%XMM10 |
(437) 0x441a00 PSHUFD $-0x12,%XMM13,%XMM11 |
(437) 0x441a06 MOVQ %XMM11,%R9 |
(437) 0x441a0b MOVQ %XMM14,%RSI |
(437) 0x441a10 PSHUFD $-0x12,%XMM14,%XMM11 |
(437) 0x441a16 MOVQ %XMM11,%R11 |
(437) 0x441a1b MOVQ %R14,%XMM11 |
(437) 0x441a20 PSHUFD $0x44,%XMM11,%XMM11 |
(437) 0x441a26 MOVDQA %XMM11,%XMM12 |
(437) 0x441a2b PADDQ %XMM2,%XMM12 |
(437) 0x441a30 PADDQ %XMM3,%XMM11 |
(437) 0x441a35 MOVDQA %XMM9,%XMM13 |
(437) 0x441a3a PMULUDQ %XMM11,%XMM13 |
(437) 0x441a3f MOVDQA %XMM8,%XMM14 |
(437) 0x441a44 PMULUDQ %XMM11,%XMM14 |
(437) 0x441a49 PSRLQ $0x20,%XMM11 |
(437) 0x441a4f PMULUDQ %XMM8,%XMM11 |
(437) 0x441a54 PADDQ %XMM13,%XMM11 |
(437) 0x441a59 MOVDQA %XMM9,%XMM13 |
(437) 0x441a5e PMULUDQ %XMM12,%XMM13 |
(437) 0x441a63 MOVDQA %XMM8,%XMM15 |
(437) 0x441a68 PMULUDQ %XMM12,%XMM15 |
(437) 0x441a6d PSRLQ $0x20,%XMM12 |
(437) 0x441a73 PMULUDQ %XMM8,%XMM12 |
(437) 0x441a78 PADDQ %XMM13,%XMM12 |
(437) 0x441a7d MOVSD (%R9),%XMM13 |
(437) 0x441a82 PSLLQ $0x20,%XMM12 |
(437) 0x441a88 PADDQ %XMM7,%XMM15 |
(437) 0x441a8d PADDQ %XMM12,%XMM15 |
(437) 0x441a92 MOVSD (%RSI),%XMM12 |
(437) 0x441a97 PSLLQ $0x20,%XMM11 |
(437) 0x441a9d PADDQ %XMM7,%XMM14 |
(437) 0x441aa2 PADDQ %XMM11,%XMM14 |
(437) 0x441aa7 MOVSD (%R11),%XMM11 |
(437) 0x441aac MOVQ %XMM14,%RSI |
(437) 0x441ab1 MOVSD %XMM10,(%RSI) |
(437) 0x441ab6 PSHUFD $-0x12,%XMM14,%XMM10 |
(437) 0x441abc MOVQ %XMM10,%RSI |
(437) 0x441ac1 MOVSD %XMM13,(%RSI) |
(437) 0x441ac6 MOVQ %XMM15,%RSI |
(437) 0x441acb MOVSD %XMM12,(%RSI) |
(437) 0x441ad0 PSHUFD $-0x12,%XMM15,%XMM10 |
(437) 0x441ad6 MOVQ %XMM10,%RSI |
(437) 0x441adb MOVSD %XMM11,(%RSI) |
(437) 0x441ae0 ADD $0x4,%RDX |
(437) 0x441ae4 ADD $-0x4,%R14 |
(437) 0x441ae8 CMP %RBX,%RDX |
(437) 0x441aeb JB 441970 |
(435) 0x441af1 CMP %R8,%RBX |
(435) 0x441af4 MOV -0x3c(%RBP),%EDX |
(435) 0x441af7 MOV -0x80(%RBP),%R9 |
(435) 0x441afb MOV -0x78(%RBP),%R11 |
(435) 0x441aff MOV -0x70(%RBP),%R14 |
(435) 0x441b03 MOV -0x68(%RBP),%RSI |
(435) 0x441b07 JE 441880 |
(435) 0x441b0d JMP 441b12 |
0x441b0f NOP |
(435) 0x441b10 XOR %EBX,%EBX |
(435) 0x441b12 MOVSXD -0x2c(%RBP),%RAX |
(435) 0x441b16 INC %RAX |
(435) 0x441b19 SUB %RBX,%RSI |
(435) 0x441b1c IMUL %RCX,%RSI |
(435) 0x441b20 MOV -0x88(%RBP),%R10 |
(435) 0x441b27 IMUL %RAX,%R10 |
(435) 0x441b2b ADD %R10,%R15 |
(435) 0x441b2e ADD %RSI,%R15 |
(435) 0x441b31 NEG %RCX |
(435) 0x441b34 SUB %RBX,%R8 |
(435) 0x441b37 ADD %RBX,%R14 |
(435) 0x441b3a LEA (%R11,%RDI,1),%EAX |
(435) 0x441b3e IMUL %R11D,%EAX |
(435) 0x441b42 CLTQ |
(435) 0x441b44 ADD %R14,%RAX |
(435) 0x441b47 IMUL %R13,%RAX |
(435) 0x441b4b ADD %RAX,%R12 |
(435) 0x441b4e XCHG %AX,%AX |
(436) 0x441b50 MOVQ (%R12),%XMM4 |
(436) 0x441b56 MOVQ %XMM4,(%R15) |
(436) 0x441b5b ADD %RCX,%R15 |
(436) 0x441b5e ADD %R13,%R12 |
(436) 0x441b61 DEC %R8 |
(436) 0x441b64 JNE 441b50 |
(435) 0x441b66 JMP 441880 |
0x441b6b NOPL (%RAX,%RAX,1) |
Path / |
Source file and lines | pack_kernel.f90:108-116 |
Module | exec |
nb instructions | 63 |
nb uops | 62 |
loop length | 214 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 10.33 cycles |
front end | 10.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.75 | 4.75 | 4.75 | 4.75 | 3.00 | 7.33 | 7.33 | 7.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 4.75 | 4.75 | 4.75 | 4.75 | 3.00 | 7.33 | 7.33 | 7.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 10.33 |
Dispatch | 7.33 |
Overall L1 | 10.33 |
all | 16% |
load | 57% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 16% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 11% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 6% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x68,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RCX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x28(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVL $0,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JS 441833 <pack_kernel_module_mp_clover_unpack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split99+0x83> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVL $0,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0x1,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x40(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x44(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x34(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x30(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x573540,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 404670 <__kmpc_for_static_init_4@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x34(%RBP),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x30(%RBP),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %ECX,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 441842 <pack_kernel_module_mp_clover_unpack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split99+0x92> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV $0x573560,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x38(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CALL 404230 <__kmpc_for_static_fini@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x68,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%RCX,%R12,1),%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RCX,%R12,1),%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DEC %EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOVDQA 0xebe93(%RIP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVDQA 0xebdeb(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVDQA 0xebe93(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVDQA 0xebe9b(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV %RSI,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDX,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 441896 <pack_kernel_module_mp_clover_unpack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split99+0xe6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | pack_kernel.f90:108-116 |
Module | exec |
nb instructions | 63 |
nb uops | 62 |
loop length | 214 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 10.33 cycles |
front end | 10.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.75 | 4.75 | 4.75 | 4.75 | 3.00 | 7.33 | 7.33 | 7.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 4.75 | 4.75 | 4.75 | 4.75 | 3.00 | 7.33 | 7.33 | 7.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 10.33 |
Dispatch | 7.33 |
Overall L1 | 10.33 |
all | 16% |
load | 57% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 16% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 11% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 6% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x68,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RCX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x28(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVL $0,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JS 441833 <pack_kernel_module_mp_clover_unpack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split99+0x83> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVL $0,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0x1,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x40(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x44(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x34(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x30(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x573540,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 404670 <__kmpc_for_static_init_4@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x34(%RBP),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x30(%RBP),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %ECX,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 441842 <pack_kernel_module_mp_clover_unpack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split99+0x92> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV $0x573560,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x38(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CALL 404230 <__kmpc_for_static_fini@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x68,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%RCX,%R12,1),%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RCX,%R12,1),%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DEC %EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOVDQA 0xebe93(%RIP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVDQA 0xebdeb(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVDQA 0xebe93(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOVDQA 0xebe9b(%RIP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV %RSI,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDX,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 441896 <pack_kernel_module_mp_clover_unpack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split99+0xe6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼clover_unpack_message_left_.DIR.OMP.PARALLEL.LOOP.2.split99– | 0.01 | 0 |
▼Loop 435 - pack_kernel.f90:109-113 - exec– | 0 | 0 |
○Loop 436 - pack_kernel.f90:111-113 - exec | 0 | 0.01 |
○Loop 437 - pack_kernel.f90:111-113 - exec | 0 | 0 |