| Loop Id: 174 | Module: exec | Source: build_field.cpp:101-113 [...] | Coverage: 0.11% |
|---|
| Loop Id: 174 | Module: exec | Source: build_field.cpp:101-113 [...] | Coverage: 0.11% |
|---|
0x23cde0 VEXTRACTI32X4 $0x3,%ZMM0,%XMM17 |
0x23cde7 VPEXTRQ $0x1,%XMM13,%RSI |
0x23cded MOV %RDX,%R13 |
0x23cdf0 MOV %RDI,%R8 |
0x23cdf3 VMOVQ %XMM13,%RDI |
0x23cdf8 MOV %R11,%R10 |
0x23cdfb VPEXTRQ $0x1,%XMM14,%R11 |
0x23ce01 VEXTRACTI32X4 $0x1,%YMM0,%XMM21 |
0x23ce08 VEXTRACTI32X4 $0x1,%YMM1,%XMM22 |
0x23ce0f KXNORW %K0,%K0,%K1 |
0x23ce13 VPEXTRQ $0x1,%XMM17,%RAX |
0x23ce1a CQTO |
0x23ce1c IDIV %RSI |
0x23ce1f MOV %RAX,%RSI |
0x23ce22 VMOVQ %XMM17,%RAX |
0x23ce28 VEXTRACTI32X4 $0x2,%ZMM0,%XMM17 |
0x23ce2f VMOVQ %RSI,%XMM18 |
0x23ce35 VMOVQ %XMM14,%RSI |
0x23ce3a CQTO |
0x23ce3c IDIV %RDI |
0x23ce3f MOV %RAX,%RDI |
0x23ce42 VPEXTRQ $0x1,%XMM17,%RAX |
0x23ce49 VMOVQ %RDI,%XMM19 |
0x23ce4f MOV %R8,%RDI |
0x23ce52 CQTO |
0x23ce54 VPUNPCKLQDQ %XMM18,%XMM19,%XMM18 |
0x23ce5a IDIV %R11 |
0x23ce5d MOV %R10,%R11 |
0x23ce60 VMOVQ %RAX,%XMM20 |
0x23ce66 VMOVQ %XMM17,%RAX |
0x23ce6c CQTO |
0x23ce6e IDIV %RSI |
0x23ce71 VPEXTRQ $0x1,%XMM22,%RSI |
0x23ce78 VMOVQ %RAX,%XMM17 |
0x23ce7e VPEXTRQ $0x1,%XMM21,%RAX |
0x23ce85 CQTO |
0x23ce87 VPUNPCKLQDQ %XMM20,%XMM17,%XMM17 |
0x23ce8d IDIV %RSI |
0x23ce90 VMOVQ %XMM22,%RSI |
0x23ce96 VINSERTI32X4 $0x1,%XMM18,%YMM17,%YMM17 |
0x23ce9d VMOVQ %RAX,%XMM23 |
0x23cea3 VMOVQ %XMM21,%RAX |
0x23cea9 CQTO |
0x23ceab IDIV %RSI |
0x23ceae VPEXTRQ $0x1,%XMM1,%RSI |
0x23ceb4 VMOVQ %RAX,%XMM21 |
0x23ceba VPEXTRQ $0x1,%XMM0,%RAX |
0x23cec0 CQTO |
0x23cec2 VPUNPCKLQDQ %XMM23,%XMM21,%XMM18 |
0x23cec8 IDIV %RSI |
0x23cecb VMOVQ %XMM1,%RSI |
0x23ced0 VMOVQ %RAX,%XMM22 |
0x23ced6 VMOVQ %XMM0,%RAX |
0x23cedb CQTO |
0x23cedd IDIV %RSI |
0x23cee0 ADD $-0x8,%R14 |
0x23cee4 MOV %R13,%RDX |
0x23cee7 VMOVQ %RAX,%XMM24 |
0x23ceed VPUNPCKLQDQ %XMM22,%XMM24,%XMM19 |
0x23cef3 VINSERTI32X4 $0x1,%XMM18,%YMM19,%YMM18 |
0x23cefa VINSERTI64X4 $0x1,%YMM17,%ZMM18,%ZMM17 |
0x23cf01 VPMULLQ %ZMM1,%ZMM17,%ZMM18 |
0x23cf07 VPSLLQ $0x20,%ZMM17,%ZMM17 |
0x23cf0e VPSRAQ $0x20,%ZMM17,%ZMM17 |
0x23cf15 VPMULLQ %ZMM2,%ZMM17,%ZMM19 |
0x23cf1b VPMULLQ %ZMM3,%ZMM17,%ZMM21 |
0x23cf21 VPMULLQ %ZMM5,%ZMM17,%ZMM20 |
0x23cf27 VPSUBQ %ZMM18,%ZMM0,%ZMM18 |
0x23cf2d VPADDQ %ZMM16,%ZMM0,%ZMM0 |
0x23cf33 VPSLLQ $0x20,%ZMM18,%ZMM18 |
0x23cf3a VPSRAQ $0x20,%ZMM18,%ZMM18 |
0x23cf41 VPADDQ %ZMM19,%ZMM18,%ZMM19 |
0x23cf47 VSCATTERQPD %ZMM15,(%R8,%ZMM19,8){%K1} [8] |
0x23cf4e VPADDQ %ZMM21,%ZMM18,%ZMM19 |
0x23cf54 KXNORW %K0,%K0,%K1 |
0x23cf58 VPMULLQ %ZMM6,%ZMM17,%ZMM21 |
0x23cf5e VSCATTERQPD %ZMM15,(%R10,%ZMM19,8){%K1} [4] |
0x23cf65 VPMULLQ %ZMM4,%ZMM17,%ZMM19 |
0x23cf6b KXNORW %K0,%K0,%K1 |
0x23cf6f MOV -0x40(%RBP),%RAX [11] |
0x23cf73 VPADDQ %ZMM19,%ZMM18,%ZMM19 |
0x23cf79 VSCATTERQPD %ZMM15,(%RAX,%ZMM19,8){%K1} [3] |
0x23cf80 VPADDQ %ZMM20,%ZMM18,%ZMM19 |
0x23cf86 VPMULLQ %ZMM7,%ZMM17,%ZMM20 |
0x23cf8c KXNORW %K0,%K0,%K1 |
0x23cf90 VSCATTERQPD %ZMM15,(%R9,%ZMM19,8){%K1} [5] |
0x23cf97 VPADDQ %ZMM21,%ZMM18,%ZMM19 |
0x23cf9d KXNORW %K0,%K0,%K1 |
0x23cfa1 VSCATTERQPD %ZMM15,(%R12,%ZMM19,8){%K1} [10] |
0x23cfa8 KXNORW %K0,%K0,%K1 |
0x23cfac VPADDQ %ZMM20,%ZMM18,%ZMM19 |
0x23cfb2 VSCATTERQPD %ZMM15,(%RBX,%ZMM19,8){%K1} [2] |
0x23cfb9 VPMULLQ %ZMM8,%ZMM17,%ZMM19 |
0x23cfbf KXNORW %K0,%K0,%K1 |
0x23cfc3 MOV -0xa0(%RBP),%RAX [11] |
0x23cfca VPADDQ %ZMM19,%ZMM18,%ZMM19 |
0x23cfd0 VSCATTERQPD %ZMM15,(%RAX,%ZMM19,8){%K1} [7] |
0x23cfd7 VPMULLQ %ZMM9,%ZMM17,%ZMM19 |
0x23cfdd KXNORW %K0,%K0,%K1 |
0x23cfe1 MOV -0x98(%RBP),%RAX [11] |
0x23cfe8 VPADDQ %ZMM19,%ZMM18,%ZMM19 |
0x23cfee VSCATTERQPD %ZMM15,(%RAX,%ZMM19,8){%K1} [9] |
0x23cff5 VPMULLQ %ZMM10,%ZMM17,%ZMM19 |
0x23cffb KXNORW %K0,%K0,%K1 |
0x23cfff MOV -0x90(%RBP),%RAX [11] |
0x23d006 VPADDQ %ZMM19,%ZMM18,%ZMM19 |
0x23d00c VSCATTERQPD %ZMM15,(%RAX,%ZMM19,8){%K1} [1] |
0x23d013 VPMULLQ %ZMM11,%ZMM17,%ZMM19 |
0x23d019 VPMULLQ %ZMM12,%ZMM17,%ZMM17 |
0x23d01f KXNORW %K0,%K0,%K1 |
0x23d023 MOV -0x38(%RBP),%RAX [11] |
0x23d027 VPADDQ %ZMM19,%ZMM18,%ZMM19 |
0x23d02d VPADDQ %ZMM17,%ZMM18,%ZMM17 |
0x23d033 VSCATTERQPD %ZMM15,(%RAX,%ZMM19,8){%K1} [12] |
0x23d03a KXNORW %K0,%K0,%K1 |
0x23d03e VSCATTERQPD %ZMM15,(%R13,%ZMM17,8){%K1} [6] |
0x23d046 JNE 23cde0 |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/build_field.cpp: 101 - 113 |
-------------------------------------------------------------------------------- |
101: for (int j = (0); j < (yrange + 1); j++) { |
102: for (int i = (0); i < (xrange + 1); i++) { |
103: field.work_array1(i, j) = 0.0; |
104: field.work_array2(i, j) = 0.0; |
105: field.work_array3(i, j) = 0.0; |
106: field.work_array4(i, j) = 0.0; |
107: field.work_array5(i, j) = 0.0; |
108: field.work_array6(i, j) = 0.0; |
109: field.work_array7(i, j) = 0.0; |
110: field.xvel0(i, j) = 0.0; |
111: field.xvel1(i, j) = 0.0; |
112: field.yvel0(i, j) = 0.0; |
113: field.yvel1(i, j) = 0.0; |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.00 |
| CQA speedup if FP arith vectorized | 1.00 |
| CQA speedup if fully vectorized | 1.03 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | NA |
| Bottlenecks | micro-operation queue, P12, P13, |
| Function | .omp_outlined.#0x23cb50 |
| Source | build_field.cpp:101-113,context.h:69-69 |
| Source loop unroll info | unrolled by 8 |
| Source loop unroll confidence level | max |
| Unroll/vectorization loop type | main |
| Unroll factor | 8 |
| CQA cycles | 107.00 |
| CQA cycles if no scalar integer | 107.00 |
| CQA cycles if FP arith vectorized | 107.00 |
| CQA cycles if fully vectorized | 103.50 |
| Front-end cycles | 107.00 |
| DIV/SQRT cycles | 16.00 |
| P0 cycles | 0.50 |
| P1 cycles | 8.00 |
| P2 cycles | 0.50 |
| P3 cycles | 1.00 |
| P4 cycles | 1.67 |
| P5 cycles | 1.67 |
| P6 cycles | 1.67 |
| P7 cycles | 26.50 |
| P8 cycles | 40.17 |
| P9 cycles | 40.17 |
| P10 cycles | 40.17 |
| P11 cycles | 107.00 |
| P12 cycles | 107.00 |
| P13 cycles | 56.00 - 96.00 |
| Inter-iter dependencies cycles | 1 |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 117.00 |
| Nb uops | 642.00 |
| Nb loads | 13.00 |
| Nb stores | 11.00 |
| Nb stack references | 5.00 |
| FLOP/cycle | 0.00 |
| Nb FLOP add-sub | 0.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 6.95 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 40.00 |
| Bytes stored | 704.00 |
| Stride 0 | 1.00 |
| Stride 1 | 0.00 |
| Stride n | 0.00 |
| Stride unknown | 0.00 |
| Stride indirect | 7.00 |
| Vectorization ratio all | 48.96 |
| Vectorization ratio load | NA |
| Vectorization ratio store | 100.00 |
| Vectorization ratio mul | 100.00 |
| Vectorization ratio add_sub | 100.00 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | 0.00 |
| Vectorization ratio other | 21.15 |
| Vector-efficiency ratio all | 50.13 |
| Vector-efficiency ratio load | NA |
| Vector-efficiency ratio store | 100.00 |
| Vector-efficiency ratio mul | 100.00 |
| Vector-efficiency ratio add_sub | 100.00 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | 12.50 |
| Vector-efficiency ratio other | 21.39 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.00 |
| CQA speedup if FP arith vectorized | 1.00 |
| CQA speedup if fully vectorized | 1.03 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | NA |
| Bottlenecks | micro-operation queue, P12, P13, |
| Function | .omp_outlined.#0x23cb50 |
| Source | build_field.cpp:101-113,context.h:69-69 |
| Source loop unroll info | unrolled by 8 |
| Source loop unroll confidence level | max |
| Unroll/vectorization loop type | main |
| Unroll factor | 8 |
| CQA cycles | 107.00 |
| CQA cycles if no scalar integer | 107.00 |
| CQA cycles if FP arith vectorized | 107.00 |
| CQA cycles if fully vectorized | 103.50 |
| Front-end cycles | 107.00 |
| DIV/SQRT cycles | 16.00 |
| P0 cycles | 0.50 |
| P1 cycles | 8.00 |
| P2 cycles | 0.50 |
| P3 cycles | 1.00 |
| P4 cycles | 1.67 |
| P5 cycles | 1.67 |
| P6 cycles | 1.67 |
| P7 cycles | 26.50 |
| P8 cycles | 40.17 |
| P9 cycles | 40.17 |
| P10 cycles | 40.17 |
| P11 cycles | 107.00 |
| P12 cycles | 107.00 |
| P13 cycles | 56.00 - 96.00 |
| Inter-iter dependencies cycles | 1 |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 117.00 |
| Nb uops | 642.00 |
| Nb loads | 13.00 |
| Nb stores | 11.00 |
| Nb stack references | 5.00 |
| FLOP/cycle | 0.00 |
| Nb FLOP add-sub | 0.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 6.95 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 40.00 |
| Bytes stored | 704.00 |
| Stride 0 | 1.00 |
| Stride 1 | 0.00 |
| Stride n | 0.00 |
| Stride unknown | 0.00 |
| Stride indirect | 7.00 |
| Vectorization ratio all | 48.96 |
| Vectorization ratio load | NA |
| Vectorization ratio store | 100.00 |
| Vectorization ratio mul | 100.00 |
| Vectorization ratio add_sub | 100.00 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | 0.00 |
| Vectorization ratio other | 21.15 |
| Vector-efficiency ratio all | 50.13 |
| Vector-efficiency ratio load | NA |
| Vector-efficiency ratio store | 100.00 |
| Vector-efficiency ratio mul | 100.00 |
| Vector-efficiency ratio add_sub | 100.00 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | 12.50 |
| Vector-efficiency ratio other | 21.39 |
| Path / |
| Function | .omp_outlined.#0x23cb50 |
| Source file and lines | build_field.cpp:101-113 |
| Module | exec |
| nb instructions | 117 |
| nb uops | 642 |
| loop length | 620 |
| used x86 registers | 13 |
| used mmx registers | 0 |
| used xmm registers | 12 |
| used ymm registers | 5 |
| used zmm registers | 20 |
| nb stack references | 5 |
| micro-operation queue | 107.00 cycles |
| front end | 107.00 cycles |
| ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 16.00 | 0.50 | 8.00 | 0.50 | 1.00 | 1.67 | 1.67 | 1.67 | 26.50 | 40.17 | 40.17 | 40.17 | 107.00 | 107.00 |
| cycles | 16.00 | 0.50 | 8.00 | 0.50 | 1.00 | 1.67 | 1.67 | 1.67 | 26.50 | 40.17 | 40.17 | 40.17 | 107.00 | 107.00 |
| Cycles executing div or sqrt instructions | 56.00-96.00 |
| Longest recurrence chain latency (RecMII) | 1.00 |
| Front-end | 107.00 |
| Dispatch | 107.00 |
| DIV/SQRT | 56.00-96.00 |
| Data deps. | 1.00 |
| Overall L1 | 107.00 |
| all | 42% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 100% |
| add-sub | 100% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 18% |
| all | 100% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 48% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | 100% |
| mul | 100% |
| add-sub | 100% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 0% |
| other | 21% |
| all | 43% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 100% |
| add-sub | 100% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 20% |
| all | 100% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 50% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | 100% |
| mul | 100% |
| add-sub | 100% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 12% |
| other | 21% |
| Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| VEXTRACTI32X4 $0x3,%ZMM0,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
| VPEXTRQ $0x1,%XMM13,%RSI | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
| MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| MOV %RDI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| VMOVQ %XMM13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
| MOV %R11,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| VPEXTRQ $0x1,%XMM14,%R11 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
| VEXTRACTI32X4 $0x1,%YMM0,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 |
| VEXTRACTI32X4 $0x1,%YMM1,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| VPEXTRQ $0x1,%XMM17,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
| CQTO | |||||||||||||||||
| IDIV %RSI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
| MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| VMOVQ %XMM17,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
| VEXTRACTI32X4 $0x2,%ZMM0,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
| VMOVQ %RSI,%XMM18 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| VMOVQ %XMM14,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
| CQTO | |||||||||||||||||
| IDIV %RDI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
| MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| VPEXTRQ $0x1,%XMM17,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
| VMOVQ %RDI,%XMM19 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| MOV %R8,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| CQTO | |||||||||||||||||
| VPUNPCKLQDQ %XMM18,%XMM19,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
| IDIV %R11 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
| MOV %R10,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| VMOVQ %RAX,%XMM20 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| VMOVQ %XMM17,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
| CQTO | |||||||||||||||||
| IDIV %RSI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
| VPEXTRQ $0x1,%XMM22,%RSI | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
| VMOVQ %RAX,%XMM17 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| VPEXTRQ $0x1,%XMM21,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
| CQTO | |||||||||||||||||
| VPUNPCKLQDQ %XMM20,%XMM17,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
| IDIV %RSI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
| VMOVQ %XMM22,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
| VINSERTI32X4 $0x1,%XMM18,%YMM17,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
| VMOVQ %RAX,%XMM23 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| VMOVQ %XMM21,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
| CQTO | |||||||||||||||||
| IDIV %RSI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
| VPEXTRQ $0x1,%XMM1,%RSI | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
| VMOVQ %RAX,%XMM21 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| VPEXTRQ $0x1,%XMM0,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
| CQTO | |||||||||||||||||
| VPUNPCKLQDQ %XMM23,%XMM21,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
| IDIV %RSI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
| VMOVQ %XMM1,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
| VMOVQ %RAX,%XMM22 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| VMOVQ %XMM0,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
| CQTO | |||||||||||||||||
| IDIV %RSI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
| ADD $-0x8,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
| MOV %R13,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| VMOVQ %RAX,%XMM24 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| VPUNPCKLQDQ %XMM22,%XMM24,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
| VINSERTI32X4 $0x1,%XMM18,%YMM19,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
| VINSERTI64X4 $0x1,%YMM17,%ZMM18,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
| VPMULLQ %ZMM1,%ZMM17,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPSLLQ $0x20,%ZMM17,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 |
| VPSRAQ $0x20,%ZMM17,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 |
| VPMULLQ %ZMM2,%ZMM17,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPMULLQ %ZMM3,%ZMM17,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPMULLQ %ZMM5,%ZMM17,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPSUBQ %ZMM18,%ZMM0,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VPADDQ %ZMM16,%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VPSLLQ $0x20,%ZMM18,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 |
| VPSRAQ $0x20,%ZMM18,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 |
| VPADDQ %ZMM19,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%R8,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| VPADDQ %ZMM21,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| VPMULLQ %ZMM6,%ZMM17,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| VSCATTERQPD %ZMM15,(%R10,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| VPMULLQ %ZMM4,%ZMM17,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
| VPADDQ %ZMM19,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%RAX,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| VPADDQ %ZMM20,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VPMULLQ %ZMM7,%ZMM17,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%R9,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| VPADDQ %ZMM21,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%R12,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| VPADDQ %ZMM20,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%RBX,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| VPMULLQ %ZMM8,%ZMM17,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
| VPADDQ %ZMM19,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%RAX,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| VPMULLQ %ZMM9,%ZMM17,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
| VPADDQ %ZMM19,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%RAX,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| VPMULLQ %ZMM10,%ZMM17,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV -0x90(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
| VPADDQ %ZMM19,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%RAX,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| VPMULLQ %ZMM11,%ZMM17,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPMULLQ %ZMM12,%ZMM17,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
| VPADDQ %ZMM19,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VPADDQ %ZMM17,%ZMM18,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%RAX,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%R13,%ZMM17,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| JNE 23cde0 <.omp_outlined.+0x290> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
| Function | .omp_outlined.#0x23cb50 |
| Source file and lines | build_field.cpp:101-113 |
| Module | exec |
| nb instructions | 117 |
| nb uops | 642 |
| loop length | 620 |
| used x86 registers | 13 |
| used mmx registers | 0 |
| used xmm registers | 12 |
| used ymm registers | 5 |
| used zmm registers | 20 |
| nb stack references | 5 |
| micro-operation queue | 107.00 cycles |
| front end | 107.00 cycles |
| ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 16.00 | 0.50 | 8.00 | 0.50 | 1.00 | 1.67 | 1.67 | 1.67 | 26.50 | 40.17 | 40.17 | 40.17 | 107.00 | 107.00 |
| cycles | 16.00 | 0.50 | 8.00 | 0.50 | 1.00 | 1.67 | 1.67 | 1.67 | 26.50 | 40.17 | 40.17 | 40.17 | 107.00 | 107.00 |
| Cycles executing div or sqrt instructions | 56.00-96.00 |
| Longest recurrence chain latency (RecMII) | 1.00 |
| Front-end | 107.00 |
| Dispatch | 107.00 |
| DIV/SQRT | 56.00-96.00 |
| Data deps. | 1.00 |
| Overall L1 | 107.00 |
| all | 42% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 100% |
| add-sub | 100% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 18% |
| all | 100% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 48% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | 100% |
| mul | 100% |
| add-sub | 100% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 0% |
| other | 21% |
| all | 43% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | 100% |
| add-sub | 100% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 20% |
| all | 100% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | NA (no other vectorizable/vectorized instructions) |
| all | 50% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | 100% |
| mul | 100% |
| add-sub | 100% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | 12% |
| other | 21% |
| Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| VEXTRACTI32X4 $0x3,%ZMM0,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
| VPEXTRQ $0x1,%XMM13,%RSI | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
| MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| MOV %RDI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| VMOVQ %XMM13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
| MOV %R11,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| VPEXTRQ $0x1,%XMM14,%R11 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
| VEXTRACTI32X4 $0x1,%YMM0,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 |
| VEXTRACTI32X4 $0x1,%YMM1,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| VPEXTRQ $0x1,%XMM17,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
| CQTO | |||||||||||||||||
| IDIV %RSI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
| MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| VMOVQ %XMM17,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
| VEXTRACTI32X4 $0x2,%ZMM0,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
| VMOVQ %RSI,%XMM18 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| VMOVQ %XMM14,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
| CQTO | |||||||||||||||||
| IDIV %RDI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
| MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| VPEXTRQ $0x1,%XMM17,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
| VMOVQ %RDI,%XMM19 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| MOV %R8,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| CQTO | |||||||||||||||||
| VPUNPCKLQDQ %XMM18,%XMM19,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
| IDIV %R11 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
| MOV %R10,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| VMOVQ %RAX,%XMM20 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| VMOVQ %XMM17,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
| CQTO | |||||||||||||||||
| IDIV %RSI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
| VPEXTRQ $0x1,%XMM22,%RSI | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
| VMOVQ %RAX,%XMM17 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| VPEXTRQ $0x1,%XMM21,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
| CQTO | |||||||||||||||||
| VPUNPCKLQDQ %XMM20,%XMM17,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
| IDIV %RSI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
| VMOVQ %XMM22,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
| VINSERTI32X4 $0x1,%XMM18,%YMM17,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
| VMOVQ %RAX,%XMM23 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| VMOVQ %XMM21,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
| CQTO | |||||||||||||||||
| IDIV %RSI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
| VPEXTRQ $0x1,%XMM1,%RSI | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
| VMOVQ %RAX,%XMM21 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| VPEXTRQ $0x1,%XMM0,%RAX | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 |
| CQTO | |||||||||||||||||
| VPUNPCKLQDQ %XMM23,%XMM21,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
| IDIV %RSI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
| VMOVQ %XMM1,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
| VMOVQ %RAX,%XMM22 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| VMOVQ %XMM0,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
| CQTO | |||||||||||||||||
| IDIV %RSI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
| ADD $-0x8,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
| MOV %R13,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| VMOVQ %RAX,%XMM24 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| VPUNPCKLQDQ %XMM22,%XMM24,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
| VINSERTI32X4 $0x1,%XMM18,%YMM19,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
| VINSERTI64X4 $0x1,%YMM17,%ZMM18,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 |
| VPMULLQ %ZMM1,%ZMM17,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPSLLQ $0x20,%ZMM17,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 |
| VPSRAQ $0x20,%ZMM17,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 |
| VPMULLQ %ZMM2,%ZMM17,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPMULLQ %ZMM3,%ZMM17,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPMULLQ %ZMM5,%ZMM17,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPSUBQ %ZMM18,%ZMM0,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VPADDQ %ZMM16,%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VPSLLQ $0x20,%ZMM18,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 |
| VPSRAQ $0x20,%ZMM18,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 |
| VPADDQ %ZMM19,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%R8,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| VPADDQ %ZMM21,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| VPMULLQ %ZMM6,%ZMM17,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| VSCATTERQPD %ZMM15,(%R10,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| VPMULLQ %ZMM4,%ZMM17,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
| VPADDQ %ZMM19,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%RAX,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| VPADDQ %ZMM20,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VPMULLQ %ZMM7,%ZMM17,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%R9,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| VPADDQ %ZMM21,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%R12,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| VPADDQ %ZMM20,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%RBX,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| VPMULLQ %ZMM8,%ZMM17,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
| VPADDQ %ZMM19,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%RAX,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| VPMULLQ %ZMM9,%ZMM17,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
| VPADDQ %ZMM19,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%RAX,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| VPMULLQ %ZMM10,%ZMM17,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV -0x90(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
| VPADDQ %ZMM19,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%RAX,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| VPMULLQ %ZMM11,%ZMM17,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPMULLQ %ZMM12,%ZMM17,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
| VPADDQ %ZMM19,%ZMM18,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VPADDQ %ZMM17,%ZMM18,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%RAX,%ZMM19,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| VSCATTERQPD %ZMM15,(%R13,%ZMM17,8){%K1} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3.50 | 1.50 | 9 | 9 | 1-40 | 12.14 |
| JNE 23cde0 <.omp_outlined.+0x290> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
