Function: hypre_qsort0 | Module: exec | Source: hypre_qsort.c:31-187 [...] | Coverage: 0.01% |
---|
Function: hypre_qsort0 | Module: exec | Source: hypre_qsort.c:31-187 [...] | Coverage: 0.01% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-3661/intel/AMG/build/AMG/AMG/utilities/hypre_qsort.c: 31 - 187 |
-------------------------------------------------------------------------------- |
31: temp = v[i]; |
32: v[i] = v[j]; |
33: v[j] = temp; |
[...] |
175: if (left >= right) |
176: return; |
177: hypre_swap( v, left, (left+right)/2); |
178: last = left; |
179: for (i = left+1; i <= right; i++) |
180: if (v[i] < v[left]) |
181: { |
182: hypre_swap(v, ++last, i); |
183: } |
184: hypre_swap(v, left, last); |
185: hypre_qsort0(v, left, last-1); |
186: hypre_qsort0(v, last+1, right); |
187: } |
0x4d58e0 CMP %RDX,%RSI |
0x4d58e3 JGE 4d5a5c |
0x4d58e9 PUSH %RBP |
0x4d58ea MOV %RSP,%RBP |
0x4d58ed PUSH %R15 |
0x4d58ef PUSH %R14 |
0x4d58f1 PUSH %R12 |
0x4d58f3 PUSH %RBX |
0x4d58f4 MOV %RDX,%RBX |
0x4d58f7 MOV %RDI,%R14 |
0x4d58fa LEA 0x20(%RDI),%R15 |
0x4d58fe MOV %RSI,%R12 |
0x4d5901 JMP 4d593c |
0x4d5903 NOPW %CS:(%RAX,%RAX,1) |
(4166) 0x4d5910 MOV (%R14,%RSI,8),%RAX |
(4166) 0x4d5914 MOV (%R14,%R12,8),%RCX |
(4166) 0x4d5918 MOV %RCX,(%R14,%RSI,8) |
(4166) 0x4d591c MOV %RAX,(%R14,%R12,8) |
(4166) 0x4d5920 LEA -0x1(%R12),%RDX |
(4166) 0x4d5925 MOV %R14,%RDI |
(4166) 0x4d5928 CALL 4d58e0 <hypre_qsort0> |
(4166) 0x4d592d INC %R12 |
(4166) 0x4d5930 MOV %R12,%RSI |
(4166) 0x4d5933 CMP %RBX,%R12 |
(4166) 0x4d5936 JGE 4d5a54 |
(4166) 0x4d593c LEA (%RSI,%RBX,1),%RAX |
(4166) 0x4d5940 MOV %RAX,%RCX |
(4166) 0x4d5943 SHR $0x3f,%RCX |
(4166) 0x4d5947 ADD %RAX,%RCX |
(4166) 0x4d594a MOV (%R14,%RSI,8),%RAX |
(4166) 0x4d594e AND $-0x2,%RCX |
(4166) 0x4d5952 MOV (%R14,%RCX,4),%RDX |
(4166) 0x4d5956 MOV %RDX,(%R14,%RSI,8) |
(4166) 0x4d595a MOV %RAX,(%R14,%RCX,4) |
(4166) 0x4d595e LEA 0x1(%RSI),%RAX |
(4166) 0x4d5962 CMP %RAX,%RBX |
(4166) 0x4d5965 CMOVG %RBX,%RAX |
(4166) 0x4d5969 MOV %RAX,%RCX |
(4166) 0x4d596c SUB %RSI,%RCX |
(4166) 0x4d596f CMP $0x4,%RCX |
(4166) 0x4d5973 JAE 4d59c0 |
(4166) 0x4d5975 MOV %RCX,%RDX |
(4166) 0x4d5978 AND $-0x4,%RDX |
(4166) 0x4d597c CMP %RCX,%RDX |
(4166) 0x4d597f JE 4d5910 |
(4166) 0x4d5981 ADD %RSI,%RDX |
(4166) 0x4d5984 JMP 4d599c |
0x4d5986 NOPW %CS:(%RAX,%RAX,1) |
(4167) 0x4d5990 INC %RDX |
(4167) 0x4d5993 CMP %RDX,%RAX |
(4167) 0x4d5996 JE 4d5910 |
(4167) 0x4d599c MOV 0x8(%R14,%RDX,8),%RCX |
(4167) 0x4d59a1 CMP (%R14,%RSI,8),%RCX |
(4167) 0x4d59a5 JGE 4d5990 |
(4167) 0x4d59a7 MOV 0x8(%R14,%R12,8),%RDI |
(4167) 0x4d59ac MOV %RCX,0x8(%R14,%R12,8) |
(4167) 0x4d59b1 INC %R12 |
(4167) 0x4d59b4 MOV %RDI,0x8(%R14,%RDX,8) |
(4167) 0x4d59b9 JMP 4d5990 |
0x4d59bb NOPL (%RAX,%RAX,1) |
(4166) 0x4d59c0 MOV %RCX,%RDX |
(4166) 0x4d59c3 SHR $0x2,%RDX |
(4166) 0x4d59c7 LEA (%R15,%RSI,8),%RDI |
(4166) 0x4d59cb MOV %RSI,%R12 |
(4166) 0x4d59ce JMP 4d59d9 |
(4168) 0x4d59d0 ADD $0x20,%RDI |
(4168) 0x4d59d4 DEC %RDX |
(4168) 0x4d59d7 JE 4d5975 |
(4168) 0x4d59d9 MOV -0x18(%RDI),%R9 |
(4168) 0x4d59dd MOV (%R14,%RSI,8),%R8 |
(4168) 0x4d59e1 CMP %R8,%R9 |
(4168) 0x4d59e4 JGE 4d59fb |
(4168) 0x4d59e6 MOV 0x8(%R14,%R12,8),%R8 |
(4168) 0x4d59eb MOV %R9,0x8(%R14,%R12,8) |
(4168) 0x4d59f0 INC %R12 |
(4168) 0x4d59f3 MOV %R8,-0x18(%RDI) |
(4168) 0x4d59f7 MOV (%R14,%RSI,8),%R8 |
(4168) 0x4d59fb MOV -0x10(%RDI),%R9 |
(4168) 0x4d59ff CMP %R8,%R9 |
(4168) 0x4d5a02 JGE 4d5a19 |
(4168) 0x4d5a04 MOV 0x8(%R14,%R12,8),%R8 |
(4168) 0x4d5a09 MOV %R9,0x8(%R14,%R12,8) |
(4168) 0x4d5a0e INC %R12 |
(4168) 0x4d5a11 MOV %R8,-0x10(%RDI) |
(4168) 0x4d5a15 MOV (%R14,%RSI,8),%R8 |
(4168) 0x4d5a19 MOV -0x8(%RDI),%R9 |
(4168) 0x4d5a1d CMP %R8,%R9 |
(4168) 0x4d5a20 JGE 4d5a37 |
(4168) 0x4d5a22 MOV 0x8(%R14,%R12,8),%R8 |
(4168) 0x4d5a27 MOV %R9,0x8(%R14,%R12,8) |
(4168) 0x4d5a2c INC %R12 |
(4168) 0x4d5a2f MOV %R8,-0x8(%RDI) |
(4168) 0x4d5a33 MOV (%R14,%RSI,8),%R8 |
(4168) 0x4d5a37 MOV (%RDI),%R9 |
(4168) 0x4d5a3a CMP %R8,%R9 |
(4168) 0x4d5a3d JGE 4d59d0 |
(4168) 0x4d5a3f MOV 0x8(%R14,%R12,8),%R8 |
(4168) 0x4d5a44 MOV %R9,0x8(%R14,%R12,8) |
(4168) 0x4d5a49 INC %R12 |
(4168) 0x4d5a4c MOV %R8,(%RDI) |
(4168) 0x4d5a4f JMP 4d59d0 |
0x4d5a54 POP %RBX |
0x4d5a55 POP %R12 |
0x4d5a57 POP %R14 |
0x4d5a59 POP %R15 |
0x4d5a5b POP %RBP |
0x4d5a5c RET |
0x4d5a5d NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►16.68+ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_IJMatrixAssembleParCSR | IJMatrix_parcsr.c:2834 | exec |
○ | BuildIJLaplacian27pt | amg.c:2267 | exec |
○ | main | amg.c:274 | exec |
○ | __libc_start_main | libc-2.28.so | |
►11.11+ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_IJMatrixAssembleParCSR | IJMatrix_parcsr.c:2834 | exec |
○ | BuildIJLaplacian27pt | amg.c:2267 | exec |
○ | main | amg.c:274 | exec |
○ | __libc_start_main | libc-2.28.so | |
►11.11+ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_IJMatrixAssembleParCSR | IJMatrix_parcsr.c:2834 | exec |
○ | BuildIJLaplacian27pt | amg.c:2267 | exec |
○ | main | amg.c:274 | exec |
○ | __libc_start_main | libc-2.28.so | |
►5.56+ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_merge_sort.extracted | hypre_merge_sort.c:265 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so | |
►5.56+ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_IJMatrixAssembleParCSR | IJMatrix_parcsr.c:2834 | exec |
○ | BuildIJLaplacian27pt | amg.c:2267 | exec |
○ | main | amg.c:274 | exec |
○ | __libc_start_main | libc-2.28.so | |
►5.56+ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_IJMatrixAssembleParCSR | IJMatrix_parcsr.c:2834 | exec |
○ | BuildIJLaplacian27pt | amg.c:2267 | exec |
○ | main | amg.c:274 | exec |
○ | __libc_start_main | libc-2.28.so | |
►5.56+ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_ParTMatmul | par_csr_matop.c:3359 | exec |
○ | hypre_BoomerAMGSetup | par_amg_setup.c:1227 | exec |
○ | hypre_PCGSetup | pcg.c:234 | exec |
○ | main | amg.c:398 | exec |
○ | __libc_start_main | libc-2.28.so | |
►5.56+ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_IJMatrixAssembleParCSR | IJMatrix_parcsr.c:2834 | exec |
○ | BuildIJLaplacian27pt | amg.c:2267 | exec |
○ | main | amg.c:274 | exec |
○ | __libc_start_main | libc-2.28.so | |
►5.55+ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_IJMatrixAssembleParCSR | IJMatrix_parcsr.c:2834 | exec |
○ | BuildIJLaplacian27pt | amg.c:2267 | exec |
○ | main | amg.c:274 | exec |
○ | __libc_start_main | libc-2.28.so | |
►5.55+ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_ParTMatmul | par_csr_matop.c:3359 | exec |
○ | hypre_BoomerAMGSetup | par_amg_setup.c:1227 | exec |
○ | hypre_PCGSetup | pcg.c:234 | exec |
○ | main | amg.c:398 | exec |
○ | __libc_start_main | libc-2.28.so | |
►5.55+ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_IJMatrixAssembleParCSR | IJMatrix_parcsr.c:2834 | exec |
○ | BuildIJLaplacian27pt | amg.c:2267 | exec |
○ | main | amg.c:274 | exec |
○ | __libc_start_main | libc-2.28.so | |
►5.55+ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_IJMatrixAssembleParCSR | IJMatrix_parcsr.c:2834 | exec |
○ | BuildIJLaplacian27pt | amg.c:2267 | exec |
○ | main | amg.c:274 | exec |
○ | __libc_start_main | libc-2.28.so | |
►5.55+ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_IJMatrixAssembleParCSR | IJMatrix_parcsr.c:2834 | exec |
○ | BuildIJLaplacian27pt | amg.c:2267 | exec |
○ | main | amg.c:274 | exec |
○ | __libc_start_main | libc-2.28.so | |
►5.55+ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_qsort0 | hypre_qsort.c:186 | exec |
○ | hypre_IJMatrixAssembleParCSR | IJMatrix_parcsr.c:2834 | exec |
○ | BuildIJLaplacian27pt | amg.c:2267 | exec |
○ | main | amg.c:274 | exec |
○ | __libc_start_main | libc-2.28.so |
Path / |
Source file and lines | hypre_qsort.c:31-187 |
Module | exec |
nb instructions | 23 |
nb uops | 23 |
loop length | 75 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 3.83 cycles |
front end | 3.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.40 | 2.00 | 2.00 | 2.50 | 0.40 | 1.00 | 2.50 | 2.50 | 2.50 | 0.20 | 2.00 |
cycles | 1.00 | 0.40 | 2.00 | 2.00 | 2.50 | 0.40 | 1.00 | 2.50 | 2.50 | 2.50 | 0.20 | 2.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 3.93-3.95 |
Stall cycles | 0.00 |
Front-end | 3.83 |
Dispatch | 2.50 |
Overall L1 | 3.83 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4d5a5c <hypre_qsort0+0x17c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x20(%RDI),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 4d593c <hypre_qsort0+0x5c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | hypre_qsort.c:31-187 |
Module | exec |
nb instructions | 23 |
nb uops | 23 |
loop length | 75 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 3.83 cycles |
front end | 3.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.40 | 2.00 | 2.00 | 2.50 | 0.40 | 1.00 | 2.50 | 2.50 | 2.50 | 0.20 | 2.00 |
cycles | 1.00 | 0.40 | 2.00 | 2.00 | 2.50 | 0.40 | 1.00 | 2.50 | 2.50 | 2.50 | 0.20 | 2.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 3.93-3.95 |
Stall cycles | 0.00 |
Front-end | 3.83 |
Dispatch | 2.50 |
Overall L1 | 3.83 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4d5a5c <hypre_qsort0+0x17c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x20(%RDI),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 4d593c <hypre_qsort0+0x5c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_qsort0– | 0.01 | 0 |
▼Loop 4166 - hypre_qsort.c:31-186 - exec– | 0 | 0 |
○Loop 4168 - hypre_qsort.c:31-182 - exec | 0.01 | 0.04 |
○Loop 4167 - hypre_qsort.c:31-182 - exec | 0 | 0 |