Function: hypre_ParTMatmul.extracted | Module: exec | Source: par_csr_matop.c:3454-3640 [...] | Coverage: 0.09% |
---|
Function: hypre_ParTMatmul.extracted | Module: exec | Source: par_csr_matop.c:3454-3640 [...] | Coverage: 0.09% |
---|
/scratch_na/users/xoserete/qaas_runs/171-172-8217/intel/AMG/build/AMG/AMG/parcsr_mv/par_csr_matop.c: 3454 - 3640 |
-------------------------------------------------------------------------------- |
3454: #pragma omp parallel |
[...] |
3462: HYPRE_Int num_threads = hypre_NumActiveThreads(); |
3463: |
3464: size = num_cols_diag_A/num_threads; |
3465: rest = num_cols_diag_A - size*num_threads; |
3466: ii = hypre_GetThreadNum(); |
3467: if (ii < rest) |
3468: { |
3469: ns = ii*size+ii; |
3470: ne = (ii+1)*size+ii+1; |
3471: } |
3472: else |
3473: { |
3474: ns = ii*size+rest; |
3475: ne = (ii+1)*size+rest; |
3476: } |
3477: |
3478: B_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_B); |
3479: B_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_C); |
3480: |
3481: for (ik = 0; ik < num_cols_diag_B; ik++) |
3482: B_marker[ik] = -1; |
3483: |
3484: for (ik = 0; ik < num_cols_offd_C; ik++) |
3485: B_marker_offd[ik] = -1; |
3486: |
3487: nnz_d = 0; |
3488: nnz_o = 0; |
3489: for (ik = ns; ik < ne; ik++) |
3490: { |
3491: for (jk = C_tmp_diag_i[ik]; jk < C_tmp_diag_i[ik+1]; jk++) |
3492: { |
3493: jcol = C_tmp_diag_j[jk]; |
3494: B_marker[jcol] = ik; |
3495: nnz_d++; |
3496: } |
3497: for (jk = C_tmp_offd_i[ik]; jk < C_tmp_offd_i[ik+1]; jk++) |
3498: { |
3499: jcol = C_tmp_offd_j[jk]; |
3500: B_marker_offd[jcol] = ik; |
3501: nnz_o++; |
3502: } |
3503: for (jk = 0; jk < num_sends_A; jk++) |
3504: for (j1 = send_map_starts_A[jk]; j1 < send_map_starts_A[jk+1]; j1++) |
3505: if (send_map_elmts_A[j1] == ik) |
3506: { |
3507: for (j2 = C_ext_diag_i[j1]; j2 < C_ext_diag_i[j1+1]; j2++) |
3508: { |
3509: jcol = C_ext_diag_j[j2]; |
3510: if (B_marker[jcol] < ik) |
3511: { |
3512: B_marker[jcol] = ik; |
3513: nnz_d++; |
3514: } |
3515: } |
3516: for (j2 = C_ext_offd_i[j1]; j2 < C_ext_offd_i[j1+1]; j2++) |
3517: { |
3518: jcol = C_ext_offd_j[j2]; |
3519: if (B_marker_offd[jcol] < ik) |
3520: { |
3521: B_marker_offd[jcol] = ik; |
3522: nnz_o++; |
3523: } |
3524: } |
3525: break; |
3526: } |
3527: C_diag_array[ii] = nnz_d; |
3528: C_offd_array[ii] = nnz_o; |
3529: } |
3530: #ifdef HYPRE_USING_OPENMP |
3531: #pragma omp barrier |
3532: #endif |
3533: if (ii == 0) |
3534: { |
3535: nnz_d = 0; |
3536: nnz_o = 0; |
3537: for (ik = 0; ik < num_threads-1; ik++) |
3538: { |
3539: C_diag_array[ik+1] += C_diag_array[ik]; |
3540: C_offd_array[ik+1] += C_offd_array[ik]; |
3541: } |
3542: nnz_d = C_diag_array[num_threads-1]; |
3543: nnz_o = C_offd_array[num_threads-1]; |
3544: C_diag_i[num_cols_diag_A] = nnz_d; |
3545: C_offd_i[num_cols_diag_A] = nnz_o; |
3546: |
3547: C_diag = hypre_CSRMatrixCreate(num_cols_diag_A, num_cols_diag_A, nnz_d); |
3548: C_offd = hypre_CSRMatrixCreate(num_cols_diag_A, num_cols_offd_C, nnz_o); |
3549: hypre_CSRMatrixI(C_diag) = C_diag_i; |
3550: hypre_CSRMatrixInitialize(C_diag); |
3551: C_diag_j = hypre_CSRMatrixJ(C_diag); |
3552: C_diag_data = hypre_CSRMatrixData(C_diag); |
3553: hypre_CSRMatrixI(C_offd) = C_offd_i; |
3554: hypre_CSRMatrixInitialize(C_offd); |
3555: C_offd_j = hypre_CSRMatrixJ(C_offd); |
3556: C_offd_data = hypre_CSRMatrixData(C_offd); |
[...] |
3568: for (ik = 0; ik < num_cols_diag_B; ik++) |
3569: B_marker[ik] = -1; |
3570: |
3571: for (ik = 0; ik < num_cols_offd_C; ik++) |
3572: B_marker_offd[ik] = -1; |
[...] |
3581: if (ii) |
3582: { |
3583: nnz_d = C_diag_array[ii-1]; |
3584: nnz_o = C_offd_array[ii-1]; |
3585: } |
3586: for (ik = ns; ik < ne; ik++) |
3587: { |
3588: C_diag_i[ik] = nnz_d; |
3589: C_offd_i[ik] = nnz_o; |
3590: for (jk = C_tmp_diag_i[ik]; jk < C_tmp_diag_i[ik+1]; jk++) |
3591: { |
3592: jcol = C_tmp_diag_j[jk]; |
3593: C_diag_j[nnz_d] = jcol; |
3594: C_diag_data[nnz_d] = C_tmp_diag_data[jk]; |
3595: B_marker[jcol] = nnz_d; |
3596: nnz_d++; |
3597: } |
3598: for (jk = C_tmp_offd_i[ik]; jk < C_tmp_offd_i[ik+1]; jk++) |
3599: { |
3600: jcol = C_tmp_offd_j[jk]; |
3601: C_offd_j[nnz_o] = jcol; |
3602: C_offd_data[nnz_o] = C_tmp_offd_data[jk]; |
3603: B_marker_offd[jcol] = nnz_o; |
3604: nnz_o++; |
3605: } |
3606: for (jk = 0; jk < num_sends_A; jk++) |
3607: for (j1 = send_map_starts_A[jk]; j1 < send_map_starts_A[jk+1]; j1++) |
3608: if (send_map_elmts_A[j1] == ik) |
3609: { |
3610: for (j2 = C_ext_diag_i[j1]; j2 < C_ext_diag_i[j1+1]; j2++) |
3611: { |
3612: jcol = C_ext_diag_j[j2]; |
3613: if (B_marker[jcol] < C_diag_i[ik]) |
3614: { |
3615: C_diag_j[nnz_d] = jcol; |
3616: C_diag_data[nnz_d] = C_ext_diag_data[j2]; |
3617: B_marker[jcol] = nnz_d; |
3618: nnz_d++; |
3619: } |
3620: else |
3621: C_diag_data[B_marker[jcol]] += C_ext_diag_data[j2]; |
3622: } |
3623: for (j2 = C_ext_offd_i[j1]; j2 < C_ext_offd_i[j1+1]; j2++) |
3624: { |
3625: jcol = C_ext_offd_j[j2]; |
3626: if (B_marker_offd[jcol] < C_offd_i[ik]) |
3627: { |
3628: C_offd_j[nnz_o] = jcol; |
3629: C_offd_data[nnz_o] = C_ext_offd_data[j2]; |
3630: B_marker_offd[jcol] = nnz_o; |
3631: nnz_o++; |
3632: } |
3633: else |
3634: C_offd_data[B_marker_offd[jcol]] += C_ext_offd_data[j2]; |
3635: } |
3636: break; |
3637: } |
3638: } |
3639: hypre_TFree(B_marker); |
3640: hypre_TFree(B_marker_offd); |
0x4b2240 PUSH %RBP |
0x4b2241 MOV %RSP,%RBP |
0x4b2244 PUSH %R15 |
0x4b2246 PUSH %R14 |
0x4b2248 PUSH %R13 |
0x4b224a PUSH %R12 |
0x4b224c PUSH %RBX |
0x4b224d SUB $0x118,%RSP |
0x4b2254 MOV %R9,-0xa8(%RBP) |
0x4b225b MOV %R8,-0x100(%RBP) |
0x4b2262 MOV %RCX,%R13 |
0x4b2265 MOV %RDX,%R14 |
0x4b2268 MOV %RDI,-0x50(%RBP) |
0x4b226c MOV 0xc8(%RBP),%RAX |
0x4b2273 MOV %RAX,-0xd0(%RBP) |
0x4b227a MOV 0xc0(%RBP),%RAX |
0x4b2281 MOV %RAX,-0xc8(%RBP) |
0x4b2288 MOV 0xb8(%RBP),%RAX |
0x4b228f MOV %RAX,-0x98(%RBP) |
0x4b2296 MOV 0xb0(%RBP),%RBX |
0x4b229d MOV 0xa8(%RBP),%RAX |
0x4b22a4 MOV %RAX,-0x80(%RBP) |
0x4b22a8 MOV 0xa0(%RBP),%R12 |
0x4b22af MOV 0x98(%RBP),%RAX |
0x4b22b6 MOV %RAX,-0xf8(%RBP) |
0x4b22bd MOV 0x90(%RBP),%RAX |
0x4b22c4 MOV %RAX,-0x38(%RBP) |
0x4b22c8 MOV 0x88(%RBP),%RAX |
0x4b22cf MOV %RAX,-0xe0(%RBP) |
0x4b22d6 MOV 0x80(%RBP),%RAX |
0x4b22dd MOV %RAX,-0x140(%RBP) |
0x4b22e4 MOV 0x78(%RBP),%RAX |
0x4b22e8 MOV %RAX,-0x68(%RBP) |
0x4b22ec MOV 0x70(%RBP),%RAX |
0x4b22f0 MOV %RAX,-0xa0(%RBP) |
0x4b22f7 MOV 0x68(%RBP),%RAX |
0x4b22fb MOV %RAX,-0x138(%RBP) |
0x4b2302 MOV 0x60(%RBP),%RAX |
0x4b2306 MOV %RAX,-0x70(%RBP) |
0x4b230a MOV 0x58(%RBP),%RAX |
0x4b230e MOV %RAX,-0x60(%RBP) |
0x4b2312 MOV 0x50(%RBP),%RAX |
0x4b2316 MOV %RAX,-0xe8(%RBP) |
0x4b231d MOV 0x48(%RBP),%RAX |
0x4b2321 MOV %RAX,-0x110(%RBP) |
0x4b2328 MOV 0x40(%RBP),%RAX |
0x4b232c MOV %RAX,-0xc0(%RBP) |
0x4b2333 MOV 0x38(%RBP),%RAX |
0x4b2337 MOV %RAX,-0x108(%RBP) |
0x4b233e MOV 0x30(%RBP),%RAX |
0x4b2342 MOV %RAX,-0x88(%RBP) |
0x4b2349 MOV 0x28(%RBP),%RAX |
0x4b234d MOV %RAX,-0xb8(%RBP) |
0x4b2354 MOV 0x20(%RBP),%RAX |
0x4b2358 MOV %RAX,-0x128(%RBP) |
0x4b235f MOV 0x18(%RBP),%RAX |
0x4b2363 MOV %RAX,-0xf0(%RBP) |
0x4b236a MOV 0x10(%RBP),%RAX |
0x4b236e MOV %RAX,-0xb0(%RBP) |
0x4b2375 CALL 4d7300 <hypre_NumActiveThreads> |
0x4b237a MOV %RAX,%RCX |
0x4b237d MOV %R14,%RAX |
0x4b2380 OR %RCX,%RAX |
0x4b2383 SHR $0x20,%RAX |
0x4b2387 MOV %R14,-0x120(%RBP) |
0x4b238e MOV %RCX,-0x118(%RBP) |
0x4b2395 JE 4b23a7 |
0x4b2397 MOV %R14,%RAX |
0x4b239a CQTO |
0x4b239c IDIV %RCX |
0x4b239f MOV %RDX,%R15 |
0x4b23a2 MOV %RAX,%R14 |
0x4b23a5 JMP 4b23b4 |
0x4b23a7 MOV %R14D,%EAX |
0x4b23aa XOR %EDX,%EDX |
0x4b23ac DIV %ECX |
0x4b23ae MOV %EDX,%R15D |
0x4b23b1 MOV %EAX,%R14D |
0x4b23b4 CALL 4d7310 <hypre_GetThreadNum> |
0x4b23b9 LEA 0x1(%RAX),%RCX |
0x4b23bd CMP %R15,%RAX |
0x4b23c0 MOV %RAX,-0xd8(%RBP) |
0x4b23c7 MOV %RBX,-0x58(%RBP) |
0x4b23cb MOV %R12,-0x40(%RBP) |
0x4b23cf JGE 4b23e2 |
0x4b23d1 INC %R14 |
0x4b23d4 IMUL %R14,%RAX |
0x4b23d8 MOV %RAX,-0x30(%RBP) |
0x4b23dc IMUL %R14,%RCX |
0x4b23e0 JMP 4b23f4 |
0x4b23e2 IMUL %R14,%RAX |
0x4b23e6 ADD %R15,%RAX |
0x4b23e9 MOV %RAX,-0x30(%RBP) |
0x4b23ed IMUL %R14,%RCX |
0x4b23f1 ADD %R15,%RCX |
0x4b23f4 MOV %RCX,-0x90(%RBP) |
0x4b23fb MOV $0x8,%ESI |
0x4b2400 MOV %R13,%RDI |
0x4b2403 CALL 4d5650 <hypre_CAlloc> |
0x4b2408 MOV %RAX,%R14 |
0x4b240b MOV -0x98(%RBP),%RAX |
0x4b2412 MOV (%RAX),%RDI |
0x4b2415 MOV $0x8,%ESI |
0x4b241a CALL 4d5650 <hypre_CAlloc> |
0x4b241f MOV %RAX,%R15 |
0x4b2422 TEST %R13,%R13 |
0x4b2425 MOV -0x38(%RBP),%RBX |
0x4b2429 JLE 4b2440 |
0x4b242b LEA (,%R13,8),%RDX |
0x4b2433 MOV %R14,%RDI |
0x4b2436 MOV $0xff,%ESI |
0x4b243b CALL 4e0430 <__intel_avx_rep_memset> |
0x4b2440 MOV %R13,-0x130(%RBP) |
0x4b2447 MOV %R14,-0x48(%RBP) |
0x4b244b MOV -0x98(%RBP),%RDX |
0x4b2452 CMPQ $0,(%RDX) |
0x4b2456 MOV -0x90(%RBP),%RCX |
0x4b245d MOV -0x30(%RBP),%RSI |
0x4b2461 JLE 4b2480 |
0x4b2463 XOR %EAX,%EAX |
0x4b2465 NOPW %CS:(%RAX,%RAX,1) |
(3473) 0x4b2470 MOVQ $-0x1,(%R15,%RAX,8) |
(3473) 0x4b2478 INC %RAX |
(3473) 0x4b247b CMP (%RDX),%RAX |
(3473) 0x4b247e JL 4b2470 |
0x4b2480 CMP %RCX,%RSI |
0x4b2483 MOV -0x58(%RBP),%R14 |
0x4b2487 MOV -0x40(%RBP),%R13 |
0x4b248b MOV -0x70(%RBP),%R9 |
0x4b248f MOV -0x60(%RBP),%R12 |
0x4b2493 MOV -0x48(%RBP),%R11 |
0x4b2497 MOV -0x50(%RBP),%RSI |
0x4b249b JGE 4b27fd |
0x4b24a1 XOR %EAX,%EAX |
0x4b24a3 MOV -0x30(%RBP),%RDX |
0x4b24a7 XOR %ECX,%ECX |
0x4b24a9 JMP 4b24e1 |
0x4b24ab NOPL (%RAX,%RAX,1) |
(3464) 0x4b24b0 MOV -0x50(%RBP),%RSI |
(3464) 0x4b24b4 MOV -0xc8(%RBP),%RDX |
(3464) 0x4b24bb MOV -0xd8(%RBP),%RDI |
(3464) 0x4b24c2 MOV %RCX,(%RDX,%RDI,8) |
(3464) 0x4b24c6 MOV -0xd0(%RBP),%RDX |
(3464) 0x4b24cd MOV %RAX,(%RDX,%RDI,8) |
(3464) 0x4b24d1 MOV %R10,%RDX |
(3464) 0x4b24d4 CMP -0x90(%RBP),%R10 |
(3464) 0x4b24db JGE 4b27fd |
(3464) 0x4b24e1 MOV (%R12,%RDX,8),%RSI |
(3464) 0x4b24e5 LEA 0x1(%RDX),%R10 |
(3464) 0x4b24e9 CMP 0x8(%R12,%RDX,8),%RSI |
(3464) 0x4b24ee JGE 4b2520 |
(3464) 0x4b24f0 MOV -0x68(%RBP),%R8 |
(3464) 0x4b24f4 NOPW %CS:(%RAX,%RAX,1) |
(3472) 0x4b2500 MOV (%R9,%RSI,8),%RDI |
(3472) 0x4b2504 MOV %RDX,(%R11,%RDI,8) |
(3472) 0x4b2508 INC %RCX |
(3472) 0x4b250b INC %RSI |
(3472) 0x4b250e CMP (%R12,%R10,8),%RSI |
(3472) 0x4b2512 JL 4b2500 |
(3464) 0x4b2514 JMP 4b2524 |
0x4b2516 NOPW %CS:(%RAX,%RAX,1) |
(3464) 0x4b2520 MOV -0x68(%RBP),%R8 |
(3464) 0x4b2524 MOV -0xa0(%RBP),%RDI |
(3464) 0x4b252b MOV (%RDI,%RDX,8),%RSI |
(3464) 0x4b252f CMP (%RDI,%R10,8),%RSI |
(3464) 0x4b2533 JGE 4b255b |
(3464) 0x4b2535 NOPW %CS:(%RAX,%RAX,1) |
(3471) 0x4b2540 MOV (%R8,%RSI,8),%RDI |
(3471) 0x4b2544 MOV %RDX,(%R15,%RDI,8) |
(3471) 0x4b2548 INC %RAX |
(3471) 0x4b254b INC %RSI |
(3471) 0x4b254e MOV -0xa0(%RBP),%RDI |
(3471) 0x4b2555 CMP (%RDI,%R10,8),%RSI |
(3471) 0x4b2559 JL 4b2540 |
(3464) 0x4b255b TEST %R14,%R14 |
(3464) 0x4b255e JLE 4b24b0 |
(3464) 0x4b2564 VPBROADCASTQ %RDX,%YMM0 |
(3464) 0x4b256a XOR %EDI,%EDI |
(3464) 0x4b256c MOV -0x50(%RBP),%RSI |
(3464) 0x4b2570 MOV %R10,-0x78(%RBP) |
(3464) 0x4b2574 NOPW %CS:(%RAX,%RAX,1) |
(3465) 0x4b2580 MOV (%R13,%RDI,8),%R10 |
(3465) 0x4b2585 MOV 0x8(%R13,%RDI,8),%R9 |
(3465) 0x4b258a MOV %R9,%R11 |
(3465) 0x4b258d SUB %R10,%R11 |
(3465) 0x4b2590 JLE 4b26b0 |
(3465) 0x4b2596 MOV %R14,%R12 |
(3465) 0x4b2599 MOV -0x80(%RBP),%RSI |
(3465) 0x4b259d LEA (%RSI,%R10,8),%ESI |
(3465) 0x4b25a1 AND $0x7f,%ESI |
(3465) 0x4b25a4 MOV $0x80,%EBX |
(3465) 0x4b25a9 SUB %ESI,%EBX |
(3465) 0x4b25ab SHR $0x3,%EBX |
(3465) 0x4b25ae CMP %RBX,%R11 |
(3465) 0x4b25b1 MOV %RBX,%RSI |
(3465) 0x4b25b4 CMOVB %R11,%RSI |
(3465) 0x4b25b8 TEST %RSI,%RSI |
(3465) 0x4b25bb JE 4b25e7 |
(3465) 0x4b25bd MOV %R10,%R8 |
(3465) 0x4b25c0 MOV %RSI,%R14 |
(3465) 0x4b25c3 NOPW %CS:(%RAX,%RAX,1) |
(3470) 0x4b25d0 MOV -0x80(%RBP),%R13 |
(3470) 0x4b25d4 CMP %RDX,(%R13,%R8,8) |
(3470) 0x4b25d9 JE 4b26d9 |
(3470) 0x4b25df INC %R8 |
(3470) 0x4b25e2 DEC %R14 |
(3470) 0x4b25e5 JNE 4b25d0 |
(3465) 0x4b25e7 CMP %RBX,%R11 |
(3465) 0x4b25ea JBE 4b26bd |
(3465) 0x4b25f0 SUB %RSI,%R11 |
(3465) 0x4b25f3 MOV %R11,%RBX |
(3465) 0x4b25f6 AND $-0x10,%RBX |
(3465) 0x4b25fa JE 4b264a |
(3465) 0x4b25fc LEA -0x1(%RBX),%R13 |
(3465) 0x4b2600 LEA (%R10,%RSI,1),%R8 |
(3465) 0x4b2604 MOV -0x80(%RBP),%R14 |
(3465) 0x4b2608 LEA (%R14,%R8,8),%R14 |
(3465) 0x4b260c XOR %R12D,%R12D |
(3465) 0x4b260f NOP |
(3469) 0x4b2610 VPCMPEQQ 0x20(%R14,%R12,8),%YMM0,%K0 |
(3469) 0x4b2618 VPCMPEQQ (%R14,%R12,8),%YMM0,%K1 |
(3469) 0x4b261f VPCMPEQQ 0x60(%R14,%R12,8),%YMM0,%K2 |
(3469) 0x4b2627 VPCMPEQQ 0x40(%R14,%R12,8),%YMM0,%K3 |
(3469) 0x4b262f KORB %K0,%K1,%K4 |
(3469) 0x4b2633 KORB %K2,%K3,%K5 |
(3469) 0x4b2637 KORTESTB %K5,%K4 |
(3469) 0x4b263b JNE 4b270b |
(3469) 0x4b2641 ADD $0x10,%R12 |
(3469) 0x4b2645 CMP %R13,%R12 |
(3469) 0x4b2648 JBE 4b2610 |
(3465) 0x4b264a CMP %R11,%RBX |
(3465) 0x4b264d JAE 4b26ee |
(3465) 0x4b2653 ADD %RSI,%R10 |
(3465) 0x4b2656 ADD %RBX,%R10 |
(3465) 0x4b2659 MOV %R10,%R8 |
(3465) 0x4b265c MOV -0x58(%RBP),%R14 |
(3465) 0x4b2660 MOV -0x40(%RBP),%R13 |
(3465) 0x4b2664 MOV -0x38(%RBP),%RBX |
(3465) 0x4b2668 MOV -0x60(%RBP),%R12 |
(3465) 0x4b266c MOV -0x48(%RBP),%R11 |
(3465) 0x4b2670 MOV -0x78(%RBP),%R10 |
(3465) 0x4b2674 NOPW %CS:(%RAX,%RAX,1) |
(3468) 0x4b2680 MOV -0x80(%RBP),%RSI |
(3468) 0x4b2684 CMP %RDX,(%RSI,%R8,8) |
(3468) 0x4b2688 JE 4b2745 |
(3468) 0x4b268e INC %R8 |
(3468) 0x4b2691 CMP %R8,%R9 |
(3468) 0x4b2694 JNE 4b2680 |
(3465) 0x4b2696 MOV -0x70(%RBP),%R9 |
(3465) 0x4b269a MOV -0x50(%RBP),%RSI |
(3465) 0x4b269e JMP 4b27ec |
0x4b26a3 NOPW %CS:(%RAX,%RAX,1) |
(3465) 0x4b26b0 MOV -0x70(%RBP),%R9 |
(3465) 0x4b26b4 MOV -0x48(%RBP),%R11 |
(3465) 0x4b26b8 JMP 4b27e8 |
(3465) 0x4b26bd MOV %R12,%R14 |
(3465) 0x4b26c0 MOV -0x40(%RBP),%R13 |
(3465) 0x4b26c4 MOV -0x38(%RBP),%RBX |
(3465) 0x4b26c8 MOV -0x70(%RBP),%R9 |
(3465) 0x4b26cc MOV -0x48(%RBP),%R11 |
(3465) 0x4b26d0 MOV -0x60(%RBP),%R12 |
(3465) 0x4b26d4 JMP 4b27e4 |
(3465) 0x4b26d9 MOV -0x40(%RBP),%R13 |
(3465) 0x4b26dd MOV -0x38(%RBP),%RBX |
(3465) 0x4b26e1 MOV -0x48(%RBP),%R11 |
(3465) 0x4b26e5 MOV %R12,%R14 |
(3465) 0x4b26e8 MOV -0x60(%RBP),%R12 |
(3465) 0x4b26ec JMP 4b2745 |
(3465) 0x4b26ee MOV -0x58(%RBP),%R14 |
(3465) 0x4b26f2 MOV -0x40(%RBP),%R13 |
(3465) 0x4b26f6 MOV -0x38(%RBP),%RBX |
(3465) 0x4b26fa MOV -0x70(%RBP),%R9 |
(3465) 0x4b26fe MOV -0x60(%RBP),%R12 |
(3465) 0x4b2702 MOV -0x48(%RBP),%R11 |
(3465) 0x4b2706 JMP 4b27e4 |
(3465) 0x4b270b KSHIFTLB $0x4,%K0,%K0 |
(3465) 0x4b2711 KORB %K0,%K1,%K0 |
(3465) 0x4b2715 KSHIFTLB $0x4,%K2,%K1 |
(3465) 0x4b271b KORB %K1,%K3,%K1 |
(3465) 0x4b271f KUNPCKBW %K0,%K1,%K0 |
(3465) 0x4b2723 KMOVD %K0,%ESI |
(3465) 0x4b2727 TZCNT %ESI,%ESI |
(3465) 0x4b272b ADD %R12,%R8 |
(3465) 0x4b272e ADD %RSI,%R8 |
(3465) 0x4b2731 MOV -0x58(%RBP),%R14 |
(3465) 0x4b2735 MOV -0x40(%RBP),%R13 |
(3465) 0x4b2739 MOV -0x38(%RBP),%RBX |
(3465) 0x4b273d MOV -0x60(%RBP),%R12 |
(3465) 0x4b2741 MOV -0x48(%RBP),%R11 |
(3465) 0x4b2745 MOV -0xb8(%RBP),%R9 |
(3465) 0x4b274c MOV (%R9,%R8,8),%RSI |
(3465) 0x4b2750 MOV 0x8(%R9,%R8,8),%R9 |
(3465) 0x4b2755 JMP 4b2763 |
0x4b2757 NOPW (%RAX,%RAX,1) |
(3467) 0x4b2760 INC %RSI |
(3467) 0x4b2763 CMP %R9,%RSI |
(3467) 0x4b2766 JGE 4b2790 |
(3467) 0x4b2768 MOV -0x88(%RBP),%R10 |
(3467) 0x4b276f MOV (%R10,%RSI,8),%R10 |
(3467) 0x4b2773 CMP %RDX,(%R11,%R10,8) |
(3467) 0x4b2777 JGE 4b2760 |
(3467) 0x4b2779 MOV %RDX,(%R11,%R10,8) |
(3467) 0x4b277d INC %RCX |
(3467) 0x4b2780 MOV -0xb8(%RBP),%R9 |
(3467) 0x4b2787 MOV 0x8(%R9,%R8,8),%R9 |
(3467) 0x4b278c JMP 4b2760 |
0x4b278e XCHG %AX,%AX |
(3465) 0x4b2790 MOV -0xc0(%RBP),%R9 |
(3465) 0x4b2797 MOV (%R9,%R8,8),%RSI |
(3465) 0x4b279b MOV 0x8(%R9,%R8,8),%R9 |
(3465) 0x4b27a0 JMP 4b27b3 |
0x4b27a2 NOPW %CS:(%RAX,%RAX,1) |
(3466) 0x4b27b0 INC %RSI |
(3466) 0x4b27b3 CMP %R9,%RSI |
(3466) 0x4b27b6 JGE 4b27e0 |
(3466) 0x4b27b8 MOV -0x110(%RBP),%R10 |
(3466) 0x4b27bf MOV (%R10,%RSI,8),%R10 |
(3466) 0x4b27c3 CMP %RDX,(%R15,%R10,8) |
(3466) 0x4b27c7 JGE 4b27b0 |
(3466) 0x4b27c9 MOV %RDX,(%R15,%R10,8) |
(3466) 0x4b27cd INC %RAX |
(3466) 0x4b27d0 MOV -0xc0(%RBP),%R9 |
(3466) 0x4b27d7 MOV 0x8(%R9,%R8,8),%R9 |
(3466) 0x4b27dc JMP 4b27b0 |
0x4b27de XCHG %AX,%AX |
(3465) 0x4b27e0 MOV -0x70(%RBP),%R9 |
(3465) 0x4b27e4 MOV -0x50(%RBP),%RSI |
(3465) 0x4b27e8 MOV -0x78(%RBP),%R10 |
(3465) 0x4b27ec INC %RDI |
(3465) 0x4b27ef CMP %R14,%RDI |
(3465) 0x4b27f2 JNE 4b2580 |
(3464) 0x4b27f8 JMP 4b24b4 |
0x4b27fd MOV (%RSI),%ESI |
0x4b27ff MOV $0x7192b0,%EDI |
0x4b2804 VZEROUPPER |
0x4b2807 CALL 40fed0 <__kmpc_barrier@plt> |
0x4b280c MOV -0x50(%RBP),%RAX |
0x4b2810 CMPQ $0,-0xd8(%RBP) |
0x4b2818 JNE 4b2a56 |
0x4b281e MOV %RBX,%R10 |
0x4b2821 MOV -0x118(%RBP),%RDI |
0x4b2828 CMP $0x1,%RDI |
0x4b282c MOV -0xd0(%RBP),%R11 |
0x4b2833 MOV -0xc8(%RBP),%RBX |
0x4b283a JLE 4b298c |
0x4b2840 LEA -0x1(%RDI),%RAX |
0x4b2844 LEA (%RBX,%RAX,8),%RCX |
0x4b2848 CMP %R11,%RCX |
0x4b284b JB 4b28b1 |
0x4b284d LEA (%R11,%RAX,8),%RCX |
0x4b2851 CMP %RBX,%RCX |
0x4b2854 JB 4b28b1 |
0x4b2856 LEA (%R11,%RDI,8),%RAX |
0x4b285a CMP %RBX,%RAX |
0x4b285d JBE 4b295e |
0x4b2863 LEA (%RBX,%RDI,8),%RAX |
0x4b2867 CMP %R11,%RAX |
0x4b286a JBE 4b295e |
0x4b2870 MOV $0x1,%EAX |
0x4b2875 NOPW %CS:(%RAX,%RAX,1) |
(3463) 0x4b2880 MOV -0x8(%RBX,%RAX,8),%RCX |
(3463) 0x4b2885 LEA (%RBX,%RAX,8),%RDX |
(3463) 0x4b2889 MOV (%RDX),%RSI |
(3463) 0x4b288c ADD %RCX,%RSI |
(3463) 0x4b288f MOV %RSI,(%RDX) |
(3463) 0x4b2892 MOV -0x8(%R11,%RAX,8),%RCX |
(3463) 0x4b2897 LEA (%R11,%RAX,8),%RDX |
(3463) 0x4b289b MOV (%RDX),%RSI |
(3463) 0x4b289e ADD %RCX,%RSI |
(3463) 0x4b28a1 MOV %RSI,(%RDX) |
(3463) 0x4b28a4 INC %RAX |
(3463) 0x4b28a7 CMP %RAX,%RDI |
(3463) 0x4b28aa JNE 4b2880 |
0x4b28ac JMP 4b298c |
0x4b28b1 MOV %RDI,%R12 |
0x4b28b4 CMP $0x5,%RDI |
0x4b28b8 JB 4b291d |
0x4b28ba MOV %RAX,%RCX |
0x4b28bd SHR $0x2,%RCX |
0x4b28c1 MOV (%RBX),%RDX |
0x4b28c4 MOV (%R11),%RSI |
0x4b28c7 MOV $0x20,%EDI |
0x4b28cc NOPL (%RAX) |
(3461) 0x4b28d0 LEA (%RBX,%RDI,1),%R8 |
(3461) 0x4b28d4 ADD -0x18(%R8),%RDX |
(3461) 0x4b28d8 MOV %RDX,-0x18(%R8) |
(3461) 0x4b28dc LEA (%R11,%RDI,1),%R9 |
(3461) 0x4b28e0 ADD -0x18(%R9),%RSI |
(3461) 0x4b28e4 MOV %RSI,-0x18(%R9) |
(3461) 0x4b28e8 ADD -0x10(%R8),%RDX |
(3461) 0x4b28ec MOV %RDX,-0x10(%R8) |
(3461) 0x4b28f0 ADD -0x10(%R9),%RSI |
(3461) 0x4b28f4 MOV %RSI,-0x10(%R9) |
(3461) 0x4b28f8 ADD -0x8(%R8),%RDX |
(3461) 0x4b28fc MOV %RDX,-0x8(%R8) |
(3461) 0x4b2900 ADD -0x8(%R9),%RSI |
(3461) 0x4b2904 MOV %RSI,-0x8(%R9) |
(3461) 0x4b2908 ADD (%R8),%RDX |
(3461) 0x4b290b MOV %RDX,(%R8) |
(3461) 0x4b290e ADD (%R9),%RSI |
(3461) 0x4b2911 MOV %RSI,(%R9) |
(3461) 0x4b2914 ADD $0x20,%RDI |
(3461) 0x4b2918 DEC %RCX |
(3461) 0x4b291b JNE 4b28d0 |
0x4b291d MOV %RAX,%RCX |
0x4b2920 AND $-0x4,%RCX |
0x4b2924 CMP %RAX,%RCX |
0x4b2927 MOV %R12,%RDI |
0x4b292a JAE 4b298c |
0x4b292c MOV (%RBX,%RCX,8),%RAX |
0x4b2930 MOV (%R11,%RCX,8),%RDX |
0x4b2934 INC %RCX |
0x4b2937 NOPW (%RAX,%RAX,1) |
(3460) 0x4b2940 LEA (%RBX,%RCX,8),%RSI |
(3460) 0x4b2944 ADD (%RSI),%RAX |
(3460) 0x4b2947 MOV %RAX,(%RSI) |
(3460) 0x4b294a LEA (%R11,%RCX,8),%RSI |
(3460) 0x4b294e ADD (%RSI),%RDX |
(3460) 0x4b2951 MOV %RDX,(%RSI) |
(3460) 0x4b2954 INC %RCX |
(3460) 0x4b2957 CMP %RCX,%RDI |
(3460) 0x4b295a JNE 4b2940 |
0x4b295c JMP 4b298c |
0x4b295e MOV (%RBX),%RAX |
0x4b2961 MOV (%R11),%RCX |
0x4b2964 MOV $0x1,%EDX |
0x4b2969 NOPL (%RAX) |
(3462) 0x4b2970 LEA (%RBX,%RDX,8),%RSI |
(3462) 0x4b2974 ADD (%RSI),%RAX |
(3462) 0x4b2977 MOV %RAX,(%RSI) |
(3462) 0x4b297a LEA (%R11,%RDX,8),%RSI |
(3462) 0x4b297e ADD (%RSI),%RCX |
(3462) 0x4b2981 MOV %RCX,(%RSI) |
(3462) 0x4b2984 INC %RDX |
(3462) 0x4b2987 CMP %RDX,%RDI |
(3462) 0x4b298a JNE 4b2970 |
0x4b298c MOV -0x8(%RBX,%RDI,8),%RDX |
0x4b2991 MOV -0x8(%R11,%RDI,8),%RCX |
0x4b2996 MOV %RCX,-0x78(%RBP) |
0x4b299a MOV -0x120(%RBP),%R12 |
0x4b29a1 MOV -0xb0(%RBP),%RAX |
0x4b29a8 MOV %RDX,(%RAX,%R12,8) |
0x4b29ac MOV %RCX,(%R10,%R12,8) |
0x4b29b0 MOV %R12,%RDI |
0x4b29b3 MOV %R12,%RSI |
0x4b29b6 CALL 4ca980 <hypre_CSRMatrixCreate> |
0x4b29bb MOV -0x100(%RBP),%RBX |
0x4b29c2 MOV %RAX,(%RBX) |
0x4b29c5 MOV -0x98(%RBP),%RAX |
0x4b29cc MOV (%RAX),%RSI |
0x4b29cf MOV %R12,%RDI |
0x4b29d2 MOV -0x78(%RBP),%RDX |
0x4b29d6 CALL 4ca980 <hypre_CSRMatrixCreate> |
0x4b29db MOV -0x128(%RBP),%R12 |
0x4b29e2 MOV %RAX,(%R12) |
0x4b29e6 MOV (%RBX),%RDI |
0x4b29e9 MOV -0xb0(%RBP),%RAX |
0x4b29f0 MOV %RAX,(%RDI) |
0x4b29f3 MOV -0x38(%RBP),%RBX |
0x4b29f7 CALL 4caa60 <hypre_CSRMatrixInitialize> |
0x4b29fc MOV -0x100(%RBP),%RAX |
0x4b2a03 MOV (%RAX),%RAX |
0x4b2a06 MOV 0x8(%RAX),%RCX |
0x4b2a0a MOV -0xf0(%RBP),%RDX |
0x4b2a11 MOV %RCX,(%RDX) |
0x4b2a14 MOV 0x30(%RAX),%RAX |
0x4b2a18 MOV -0xa8(%RBP),%RCX |
0x4b2a1f MOV %RAX,(%RCX) |
0x4b2a22 MOV (%R12),%RDI |
0x4b2a26 MOV %RBX,(%RDI) |
0x4b2a29 CALL 4caa60 <hypre_CSRMatrixInitialize> |
0x4b2a2e MOV (%R12),%RAX |
0x4b2a32 MOV 0x8(%RAX),%RCX |
0x4b2a36 MOV -0xf8(%RBP),%RDX |
0x4b2a3d MOV %RCX,(%RDX) |
0x4b2a40 MOV 0x30(%RAX),%RAX |
0x4b2a44 MOV -0xe0(%RBP),%RCX |
0x4b2a4b MOV %RAX,(%RCX) |
0x4b2a4e MOV -0x60(%RBP),%R12 |
0x4b2a52 MOV -0x50(%RBP),%RAX |
0x4b2a56 MOV (%RAX),%ESI |
0x4b2a58 MOV $0x7192d0,%EDI |
0x4b2a5d CALL 40fed0 <__kmpc_barrier@plt> |
0x4b2a62 MOV -0x130(%RBP),%RDX |
0x4b2a69 TEST %RDX,%RDX |
0x4b2a6c JLE 4b2a80 |
0x4b2a6e SAL $0x3,%RDX |
0x4b2a72 MOV -0x48(%RBP),%RDI |
0x4b2a76 MOV $0xff,%ESI |
0x4b2a7b CALL 4e0430 <__intel_avx_rep_memset> |
0x4b2a80 MOV -0x98(%RBP),%RCX |
0x4b2a87 CMPQ $0,(%RCX) |
0x4b2a8b MOV -0xd8(%RBP),%RSI |
0x4b2a92 JLE 4b2ab0 |
0x4b2a94 XOR %EAX,%EAX |
0x4b2a96 NOPW %CS:(%RAX,%RAX,1) |
(3459) 0x4b2aa0 MOVQ $-0x1,(%R15,%RAX,8) |
(3459) 0x4b2aa8 INC %RAX |
(3459) 0x4b2aab CMP (%RCX),%RAX |
(3459) 0x4b2aae JL 4b2aa0 |
0x4b2ab0 TEST %RSI,%RSI |
0x4b2ab3 MOV -0x68(%RBP),%R10 |
0x4b2ab7 MOV -0x90(%RBP),%RDX |
0x4b2abe MOV -0x30(%RBP),%RDI |
0x4b2ac2 JE 4b2ae3 |
0x4b2ac4 MOV -0xc8(%RBP),%RAX |
0x4b2acb MOV -0x8(%RAX,%RSI,8),%RAX |
0x4b2ad0 MOV -0xd0(%RBP),%RCX |
0x4b2ad7 MOV -0x8(%RCX,%RSI,8),%RCX |
0x4b2adc CMP %RDX,%RDI |
0x4b2adf JL 4b2b34 |
0x4b2ae1 JMP 4b2aec |
0x4b2ae3 XOR %EAX,%EAX |
0x4b2ae5 XOR %ECX,%ECX |
0x4b2ae7 CMP %RDX,%RDI |
0x4b2aea JL 4b2b34 |
0x4b2aec MOV -0x48(%RBP),%RDI |
0x4b2af0 VZEROUPPER |
0x4b2af3 CALL 4d5720 <hypre_Free> |
0x4b2af8 MOV %R15,%RDI |
0x4b2afb ADD $0x118,%RSP |
0x4b2b02 POP %RBX |
0x4b2b03 POP %R12 |
0x4b2b05 POP %R13 |
0x4b2b07 POP %R14 |
0x4b2b09 POP %R15 |
0x4b2b0b POP %RBP |
0x4b2b0c JMP 4d5720 |
0x4b2b11 NOPW %CS:(%RAX,%RAX,1) |
(3450) 0x4b2b20 MOV -0x30(%RBP),%RDI |
(3450) 0x4b2b24 MOV -0x90(%RBP),%RDX |
(3450) 0x4b2b2b CMP %RDX,%RDI |
(3450) 0x4b2b2e MOV -0x60(%RBP),%R12 |
(3450) 0x4b2b32 JGE 4b2aec |
(3450) 0x4b2b34 MOV %RDI,%RDX |
(3450) 0x4b2b37 MOV -0xb0(%RBP),%RSI |
(3450) 0x4b2b3e MOV %RAX,(%RSI,%RDI,8) |
(3450) 0x4b2b42 MOV %RCX,(%RBX,%RDI,8) |
(3450) 0x4b2b46 MOV (%R12,%RDI,8),%RSI |
(3450) 0x4b2b4a INC %RDI |
(3450) 0x4b2b4d MOV %RDI,-0x30(%RBP) |
(3450) 0x4b2b51 CMP 0x8(%R12,%RDX,8),%RSI |
(3450) 0x4b2b56 JGE 4b2bc0 |
(3450) 0x4b2b58 MOV -0xf0(%RBP),%RDI |
(3450) 0x4b2b5f MOV (%RDI),%RDI |
(3450) 0x4b2b62 MOV -0xa8(%RBP),%R8 |
(3450) 0x4b2b69 MOV (%R8),%R8 |
(3450) 0x4b2b6c MOV -0x70(%RBP),%R12 |
(3450) 0x4b2b70 MOV -0x60(%RBP),%R13 |
(3450) 0x4b2b74 MOV -0x48(%RBP),%R14 |
(3450) 0x4b2b78 MOV -0x138(%RBP),%RBX |
(3450) 0x4b2b7f MOV -0x30(%RBP),%R11 |
(3450) 0x4b2b83 NOPW %CS:(%RAX,%RAX,1) |
(3458) 0x4b2b90 MOV (%R12,%RSI,8),%R9 |
(3458) 0x4b2b94 MOV %R9,(%RDI,%RAX,8) |
(3458) 0x4b2b98 VMOVQ (%RBX,%RSI,8),%XMM0 |
(3458) 0x4b2b9d VMOVQ %XMM0,(%R8,%RAX,8) |
(3458) 0x4b2ba3 MOV %RAX,(%R14,%R9,8) |
(3458) 0x4b2ba7 INC %RAX |
(3458) 0x4b2baa INC %RSI |
(3458) 0x4b2bad CMP (%R13,%R11,8),%RSI |
(3458) 0x4b2bb2 JL 4b2b90 |
(3450) 0x4b2bb4 JMP 4b2bc4 |
0x4b2bb6 NOPW %CS:(%RAX,%RAX,1) |
(3450) 0x4b2bc0 MOV -0x30(%RBP),%R11 |
(3450) 0x4b2bc4 MOV -0xa0(%RBP),%RDI |
(3450) 0x4b2bcb MOV (%RDI,%RDX,8),%RSI |
(3450) 0x4b2bcf CMP (%RDI,%R11,8),%RSI |
(3450) 0x4b2bd3 JGE 4b2c40 |
(3450) 0x4b2bd5 MOV -0xf8(%RBP),%RDI |
(3450) 0x4b2bdc MOV (%RDI),%RDI |
(3450) 0x4b2bdf MOV -0xe0(%RBP),%R8 |
(3450) 0x4b2be6 MOV (%R8),%R8 |
(3450) 0x4b2be9 MOV -0xa0(%RBP),%R12 |
(3450) 0x4b2bf0 MOV -0x40(%RBP),%R13 |
(3450) 0x4b2bf4 MOV -0x38(%RBP),%RBX |
(3450) 0x4b2bf8 MOV -0x30(%RBP),%R11 |
(3450) 0x4b2bfc MOV -0x140(%RBP),%R14 |
(3450) 0x4b2c03 NOPW %CS:(%RAX,%RAX,1) |
(3457) 0x4b2c10 MOV (%R10,%RSI,8),%R9 |
(3457) 0x4b2c14 MOV %R9,(%RDI,%RCX,8) |
(3457) 0x4b2c18 VMOVQ (%R14,%RSI,8),%XMM0 |
(3457) 0x4b2c1e VMOVQ %XMM0,(%R8,%RCX,8) |
(3457) 0x4b2c24 MOV %RCX,(%R15,%R9,8) |
(3457) 0x4b2c28 INC %RCX |
(3457) 0x4b2c2b INC %RSI |
(3457) 0x4b2c2e CMP (%R12,%R11,8),%RSI |
(3457) 0x4b2c32 JL 4b2c10 |
(3450) 0x4b2c34 JMP 4b2c48 |
0x4b2c36 NOPW %CS:(%RAX,%RAX,1) |
(3450) 0x4b2c40 MOV -0x40(%RBP),%R13 |
(3450) 0x4b2c44 MOV -0x38(%RBP),%RBX |
(3450) 0x4b2c48 MOV -0x58(%RBP),%R14 |
(3450) 0x4b2c4c TEST %R14,%R14 |
(3450) 0x4b2c4f JLE 4b2b20 |
(3450) 0x4b2c55 VPBROADCASTQ %RDX,%YMM0 |
(3450) 0x4b2c5b XOR %ESI,%ESI |
(3450) 0x4b2c5d MOV -0x88(%RBP),%R12 |
(3450) 0x4b2c64 MOV -0x30(%RBP),%RDI |
(3450) 0x4b2c68 JMP 4b2c80 |
0x4b2c6a NOPW (%RAX,%RAX,1) |
(3451) 0x4b2c70 MOV -0x68(%RBP),%R10 |
(3451) 0x4b2c74 INC %RSI |
(3451) 0x4b2c77 CMP %R14,%RSI |
(3451) 0x4b2c7a JE 4b2b24 |
(3451) 0x4b2c80 MOV (%R13,%RSI,8),%R9 |
(3451) 0x4b2c85 MOV 0x8(%R13,%RSI,8),%R8 |
(3451) 0x4b2c8a MOV %R8,%R10 |
(3451) 0x4b2c8d SUB %R9,%R10 |
(3451) 0x4b2c90 JLE 4b2c70 |
(3451) 0x4b2c92 MOV -0x80(%RBP),%RDI |
(3451) 0x4b2c96 LEA (%RDI,%R9,8),%EDI |
(3451) 0x4b2c9a AND $0x7f,%EDI |
(3451) 0x4b2c9d MOV $0x80,%EBX |
(3451) 0x4b2ca2 SUB %EDI,%EBX |
(3451) 0x4b2ca4 SHR $0x3,%EBX |
(3451) 0x4b2ca7 CMP %RBX,%R10 |
(3451) 0x4b2caa MOV %RBX,%R11 |
(3451) 0x4b2cad CMOVB %R10,%R11 |
(3451) 0x4b2cb1 TEST %R11,%R11 |
(3451) 0x4b2cb4 JE 4b2cd7 |
(3451) 0x4b2cb6 MOV %R9,%RDI |
(3451) 0x4b2cb9 MOV %R11,%R14 |
(3451) 0x4b2cbc NOPL (%RAX) |
(3456) 0x4b2cc0 MOV -0x80(%RBP),%R13 |
(3456) 0x4b2cc4 CMP %RDX,(%R13,%RDI,8) |
(3456) 0x4b2cc9 JE 4b2da3 |
(3456) 0x4b2ccf INC %RDI |
(3456) 0x4b2cd2 DEC %R14 |
(3456) 0x4b2cd5 JNE 4b2cc0 |
(3451) 0x4b2cd7 CMP %RBX,%R10 |
(3451) 0x4b2cda JBE 4b2d8a |
(3451) 0x4b2ce0 SUB %R11,%R10 |
(3451) 0x4b2ce3 MOV %R10,%RBX |
(3451) 0x4b2ce6 AND $-0x10,%RBX |
(3451) 0x4b2cea JE 4b2d3a |
(3451) 0x4b2cec LEA -0x1(%RBX),%R13 |
(3451) 0x4b2cf0 LEA (%R9,%R11,1),%RDI |
(3451) 0x4b2cf4 MOV -0x80(%RBP),%R14 |
(3451) 0x4b2cf8 LEA (%R14,%RDI,8),%R14 |
(3451) 0x4b2cfc XOR %R12D,%R12D |
(3451) 0x4b2cff NOP |
(3455) 0x4b2d00 VPCMPEQQ 0x20(%R14,%R12,8),%YMM0,%K0 |
(3455) 0x4b2d08 VPCMPEQQ (%R14,%R12,8),%YMM0,%K1 |
(3455) 0x4b2d0f VPCMPEQQ 0x60(%R14,%R12,8),%YMM0,%K2 |
(3455) 0x4b2d17 VPCMPEQQ 0x40(%R14,%R12,8),%YMM0,%K3 |
(3455) 0x4b2d1f KORB %K0,%K1,%K4 |
(3455) 0x4b2d23 KORB %K2,%K3,%K5 |
(3455) 0x4b2d27 KORTESTB %K5,%K4 |
(3455) 0x4b2d2b JNE 4b2dc8 |
(3455) 0x4b2d31 ADD $0x10,%R12 |
(3455) 0x4b2d35 CMP %R13,%R12 |
(3455) 0x4b2d38 JBE 4b2d00 |
(3451) 0x4b2d3a CMP %R10,%RBX |
(3451) 0x4b2d3d JAE 4b2dac |
(3451) 0x4b2d3f ADD %R11,%R9 |
(3451) 0x4b2d42 ADD %RBX,%R9 |
(3451) 0x4b2d45 MOV %R9,%RDI |
(3451) 0x4b2d48 MOV -0x68(%RBP),%R10 |
(3451) 0x4b2d4c MOV -0xe8(%RBP),%R13 |
(3451) 0x4b2d53 MOV -0x88(%RBP),%R12 |
(3451) 0x4b2d5a MOV -0x30(%RBP),%R11 |
(3451) 0x4b2d5e XCHG %AX,%AX |
(3454) 0x4b2d60 MOV -0x80(%RBP),%R9 |
(3454) 0x4b2d64 CMP %RDX,(%R9,%RDI,8) |
(3454) 0x4b2d68 JE 4b2dfd |
(3454) 0x4b2d6e INC %RDI |
(3454) 0x4b2d71 CMP %RDI,%R8 |
(3454) 0x4b2d74 JNE 4b2d60 |
(3451) 0x4b2d76 MOV -0x58(%RBP),%R14 |
(3451) 0x4b2d7a MOV -0x40(%RBP),%R13 |
(3451) 0x4b2d7e MOV -0x38(%RBP),%RBX |
(3451) 0x4b2d82 MOV %R11,%RDI |
(3451) 0x4b2d85 JMP 4b2c74 |
(3451) 0x4b2d8a MOV -0x40(%RBP),%R13 |
(3451) 0x4b2d8e MOV -0x38(%RBP),%RBX |
(3451) 0x4b2d92 MOV -0x68(%RBP),%R10 |
(3451) 0x4b2d96 MOV -0x30(%RBP),%RDI |
(3451) 0x4b2d9a MOV -0x58(%RBP),%R14 |
(3451) 0x4b2d9e JMP 4b2c74 |
(3451) 0x4b2da3 MOV -0xe8(%RBP),%R13 |
(3451) 0x4b2daa JMP 4b2dfd |
(3451) 0x4b2dac MOV -0x58(%RBP),%R14 |
(3451) 0x4b2db0 MOV -0x40(%RBP),%R13 |
(3451) 0x4b2db4 MOV -0x38(%RBP),%RBX |
(3451) 0x4b2db8 MOV -0x68(%RBP),%R10 |
(3451) 0x4b2dbc MOV -0x88(%RBP),%R12 |
(3451) 0x4b2dc3 JMP 4b2f6b |
(3451) 0x4b2dc8 KSHIFTLB $0x4,%K0,%K0 |
(3451) 0x4b2dce KORB %K0,%K1,%K0 |
(3451) 0x4b2dd2 KSHIFTLB $0x4,%K2,%K1 |
(3451) 0x4b2dd8 KORB %K1,%K3,%K1 |
(3451) 0x4b2ddc KUNPCKBW %K0,%K1,%K0 |
(3451) 0x4b2de0 KMOVD %K0,%R8D |
(3451) 0x4b2de4 TZCNT %R8D,%R8D |
(3451) 0x4b2de9 ADD %R12,%RDI |
(3451) 0x4b2dec ADD %R8,%RDI |
(3451) 0x4b2def MOV -0xe8(%RBP),%R13 |
(3451) 0x4b2df6 MOV -0x88(%RBP),%R12 |
(3451) 0x4b2dfd MOV -0x48(%RBP),%R11 |
(3451) 0x4b2e01 MOV -0xb8(%RBP),%R9 |
(3451) 0x4b2e08 MOV (%R9,%RDI,8),%R8 |
(3451) 0x4b2e0c MOV 0x8(%R9,%RDI,8),%R9 |
(3451) 0x4b2e11 JMP 4b2e50 |
0x4b2e13 NOPW %CS:(%RAX,%RAX,1) |
(3453) 0x4b2e20 MOV -0xa8(%RBP),%R10 |
(3453) 0x4b2e27 MOV (%R10),%R10 |
(3453) 0x4b2e2a VMOVSD (%R10,%R11,8),%XMM1 |
(3453) 0x4b2e30 MOV -0x108(%RBP),%R12 |
(3453) 0x4b2e37 VADDSD (%R12,%R8,8),%XMM1,%XMM1 |
(3453) 0x4b2e3d MOV -0x88(%RBP),%R12 |
(3453) 0x4b2e44 VMOVSD %XMM1,(%R10,%R11,8) |
(3453) 0x4b2e4a MOV %R14,%R11 |
(3453) 0x4b2e4d INC %R8 |
(3453) 0x4b2e50 CMP %R9,%R8 |
(3453) 0x4b2e53 JGE 4b2eb0 |
(3453) 0x4b2e55 MOV (%R12,%R8,8),%R10 |
(3453) 0x4b2e59 MOV %R11,%R14 |
(3453) 0x4b2e5c MOV (%R11,%R10,8),%R11 |
(3453) 0x4b2e60 MOV -0xb0(%RBP),%RBX |
(3453) 0x4b2e67 CMP (%RBX,%RDX,8),%R11 |
(3453) 0x4b2e6b JGE 4b2e20 |
(3453) 0x4b2e6d MOV -0xf0(%RBP),%R9 |
(3453) 0x4b2e74 MOV (%R9),%R9 |
(3453) 0x4b2e77 MOV %R10,(%R9,%RAX,8) |
(3453) 0x4b2e7b MOV -0x108(%RBP),%R9 |
(3453) 0x4b2e82 VMOVSD (%R9,%R8,8),%XMM1 |
(3453) 0x4b2e88 MOV -0xa8(%RBP),%R9 |
(3453) 0x4b2e8f MOV (%R9),%R9 |
(3453) 0x4b2e92 VMOVSD %XMM1,(%R9,%RAX,8) |
(3453) 0x4b2e98 MOV %RAX,(%R14,%R10,8) |
(3453) 0x4b2e9c INC %RAX |
(3453) 0x4b2e9f MOV -0xb8(%RBP),%R9 |
(3453) 0x4b2ea6 MOV 0x8(%R9,%RDI,8),%R9 |
(3453) 0x4b2eab JMP 4b2e4a |
0x4b2ead NOPL (%RAX) |
(3451) 0x4b2eb0 MOV -0xc0(%RBP),%R9 |
(3451) 0x4b2eb7 MOV (%R9,%RDI,8),%R8 |
(3451) 0x4b2ebb MOV 0x8(%R9,%RDI,8),%R9 |
(3451) 0x4b2ec0 CMP %R9,%R8 |
(3451) 0x4b2ec3 JGE 4b2f46 |
(3451) 0x4b2ec9 MOV -0x38(%RBP),%RBX |
(3451) 0x4b2ecd MOV -0xe0(%RBP),%R14 |
(3451) 0x4b2ed4 JMP 4b2efe |
0x4b2ed6 NOPW %CS:(%RAX,%RAX,1) |
(3452) 0x4b2ee0 MOV (%R14),%R10 |
(3452) 0x4b2ee3 VMOVSD (%R10,%R11,8),%XMM1 |
(3452) 0x4b2ee9 VADDSD (%R13,%R8,8),%XMM1,%XMM1 |
(3452) 0x4b2ef0 VMOVSD %XMM1,(%R10,%R11,8) |
(3452) 0x4b2ef6 INC %R8 |
(3452) 0x4b2ef9 CMP %R9,%R8 |
(3452) 0x4b2efc JGE 4b2f5f |
(3452) 0x4b2efe MOV -0x110(%RBP),%R10 |
(3452) 0x4b2f05 MOV (%R10,%R8,8),%R10 |
(3452) 0x4b2f09 MOV (%R15,%R10,8),%R11 |
(3452) 0x4b2f0d CMP (%RBX,%RDX,8),%R11 |
(3452) 0x4b2f11 JGE 4b2ee0 |
(3452) 0x4b2f13 MOV -0xf8(%RBP),%R9 |
(3452) 0x4b2f1a MOV (%R9),%R9 |
(3452) 0x4b2f1d MOV %R10,(%R9,%RCX,8) |
(3452) 0x4b2f21 VMOVSD (%R13,%R8,8),%XMM1 |
(3452) 0x4b2f28 MOV (%R14),%R9 |
(3452) 0x4b2f2b VMOVSD %XMM1,(%R9,%RCX,8) |
(3452) 0x4b2f31 MOV %RCX,(%R15,%R10,8) |
(3452) 0x4b2f35 INC %RCX |
(3452) 0x4b2f38 MOV -0xc0(%RBP),%R9 |
(3452) 0x4b2f3f MOV 0x8(%R9,%RDI,8),%R9 |
(3452) 0x4b2f44 JMP 4b2ef6 |
(3451) 0x4b2f46 MOV -0x68(%RBP),%R10 |
(3451) 0x4b2f4a MOV -0x30(%RBP),%RDI |
(3451) 0x4b2f4e MOV -0x58(%RBP),%R14 |
(3451) 0x4b2f52 MOV -0x40(%RBP),%R13 |
(3451) 0x4b2f56 MOV -0x38(%RBP),%RBX |
(3451) 0x4b2f5a JMP 4b2c74 |
(3451) 0x4b2f5f MOV -0x68(%RBP),%R10 |
(3451) 0x4b2f63 MOV -0x58(%RBP),%R14 |
(3451) 0x4b2f67 MOV -0x40(%RBP),%R13 |
(3451) 0x4b2f6b MOV -0x30(%RBP),%RDI |
(3451) 0x4b2f6f JMP 4b2c74 |
0x4b2f74 NOPW %CS:(%RAX,%RAX,1) |
0x4b2f7e XCHG %AX,%AX |
Path / |
Source file and lines | par_csr_matop.c:3454-3640 |
Module | exec |
nb instructions | 289 |
nb uops | 311 |
loop length | 1332 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 59 |
micro-operation queue | 51.83 cycles |
front end | 51.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 12.80 | 12.80 | 32.00 | 32.00 | 32.50 | 12.80 | 12.80 | 32.50 | 32.50 | 32.50 | 12.80 | 32.00 |
cycles | 12.80 | 16.40 | 32.00 | 32.00 | 32.50 | 12.80 | 12.80 | 32.50 | 32.50 | 32.50 | 12.80 | 32.00 |
Cycles executing div or sqrt instructions | 16.00 |
FE+BE cycles | 49.01-49.06 |
Stall cycles | 0.00 |
Front-end | 51.83 |
Dispatch | 32.50 |
DIV/SQRT | 16.00 |
Overall L1 | 51.83 |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 7% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x118,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x100(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb0(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa0(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x90(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x88(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xe0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x80(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x140(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x70(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x68(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x138(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xe8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x108(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x128(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4d7300 <hypre_NumActiveThreads> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
OR %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R14,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x118(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 4b23a7 <hypre_ParTMatmul.extracted+0x167> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %RCX | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 4b23b4 <hypre_ParTMatmul.extracted+0x174> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %R14D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DIV %ECX | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EDX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4d7310 <hypre_GetThreadNum> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA 0x1(%RAX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,-0xd8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 4b23e2 <hypre_ParTMatmul.extracted+0x1a2> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
INC %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R14,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %R14,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JMP 4b23f4 <hypre_ParTMatmul.extracted+0x1b4> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
IMUL %R14,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %R14,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R15,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4d5650 <hypre_CAlloc> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4d5650 <hypre_CAlloc> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
TEST %R13,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0x38(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 4b2440 <hypre_ParTMatmul.extracted+0x200> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (,%R13,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0xff,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4e0430 <__intel_avx_rep_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R13,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x98(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMPQ $0,(%RDX) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 4b2480 <hypre_ParTMatmul.extracted+0x240> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x58(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x40(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x70(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x60(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE 4b27fd <hypre_ParTMatmul.extracted+0x5bd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4b24e1 <hypre_ParTMatmul.extracted+0x2a1> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RSI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x7192b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 40fed0 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMPQ $0,-0xd8(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 4b2a56 <hypre_ParTMatmul.extracted+0x816> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RBX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x118(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x1,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0xd0(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xc8(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 4b298c <hypre_ParTMatmul.extracted+0x74c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RBX,%RAX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R11,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4b28b1 <hypre_ParTMatmul.extracted+0x671> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R11,%RAX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RBX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4b28b1 <hypre_ParTMatmul.extracted+0x671> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R11,%RDI,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RBX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 4b295e <hypre_ParTMatmul.extracted+0x71e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%RBX,%RDI,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 4b295e <hypre_ParTMatmul.extracted+0x71e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4b298c <hypre_ParTMatmul.extracted+0x74c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP $0x5,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4b291d <hypre_ParTMatmul.extracted+0x6dd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV (%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R11),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x20,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JAE 4b298c <hypre_ParTMatmul.extracted+0x74c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV (%RBX,%RCX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R11,%RCX,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4b298c <hypre_ParTMatmul.extracted+0x74c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R11),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x1,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x8(%RBX,%RDI,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x8(%R11,%RDI,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x120(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,(%RAX,%R12,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,(%R10,%R12,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R12,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4ca980 <hypre_CSRMatrixCreate> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x100(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,(%RBX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x78(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4ca980 <hypre_CSRMatrixCreate> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x128(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,(%R12) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x38(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4caa60 <hypre_CSRMatrixInitialize> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x100(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xf0(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xa8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,(%RCX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4caa60 <hypre_CSRMatrixInitialize> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV (%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xf8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xe0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,(%RCX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x60(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x7192d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 40fed0 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x130(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4b2a80 <hypre_ParTMatmul.extracted+0x840> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x48(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0xff,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4e0430 <__intel_avx_rep_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x98(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMPQ $0,(%RCX) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV -0xd8(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 4b2ab0 <hypre_ParTMatmul.extracted+0x870> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
TEST %RSI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0x68(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 4b2ae3 <hypre_ParTMatmul.extracted+0x8a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x8(%RAX,%RSI,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xd0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x8(%RCX,%RSI,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 4b2b34 <hypre_ParTMatmul.extracted+0x8f4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4b2aec <hypre_ParTMatmul.extracted+0x8ac> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 4b2b34 <hypre_ParTMatmul.extracted+0x8f4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x48(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4d5720 <hypre_Free> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R15,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x118,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 4d5720 <hypre_Free> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | par_csr_matop.c:3454-3640 |
Module | exec |
nb instructions | 289 |
nb uops | 311 |
loop length | 1332 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 59 |
micro-operation queue | 51.83 cycles |
front end | 51.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 12.80 | 12.80 | 32.00 | 32.00 | 32.50 | 12.80 | 12.80 | 32.50 | 32.50 | 32.50 | 12.80 | 32.00 |
cycles | 12.80 | 16.40 | 32.00 | 32.00 | 32.50 | 12.80 | 12.80 | 32.50 | 32.50 | 32.50 | 12.80 | 32.00 |
Cycles executing div or sqrt instructions | 16.00 |
FE+BE cycles | 49.01-49.06 |
Stall cycles | 0.00 |
Front-end | 51.83 |
Dispatch | 32.50 |
DIV/SQRT | 16.00 |
Overall L1 | 51.83 |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 7% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x118,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x100(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb0(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa0(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x90(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x88(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xe0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x80(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x140(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x70(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x68(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x138(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xe8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x108(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x128(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4d7300 <hypre_NumActiveThreads> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
OR %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R14,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x118(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 4b23a7 <hypre_ParTMatmul.extracted+0x167> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %RCX | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 4b23b4 <hypre_ParTMatmul.extracted+0x174> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %R14D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DIV %ECX | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EDX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4d7310 <hypre_GetThreadNum> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA 0x1(%RAX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,-0xd8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 4b23e2 <hypre_ParTMatmul.extracted+0x1a2> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
INC %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R14,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %R14,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JMP 4b23f4 <hypre_ParTMatmul.extracted+0x1b4> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
IMUL %R14,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %R14,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R15,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4d5650 <hypre_CAlloc> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4d5650 <hypre_CAlloc> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
TEST %R13,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0x38(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 4b2440 <hypre_ParTMatmul.extracted+0x200> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (,%R13,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $0xff,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4e0430 <__intel_avx_rep_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R13,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x98(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMPQ $0,(%RDX) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 4b2480 <hypre_ParTMatmul.extracted+0x240> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x58(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x40(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x70(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x60(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE 4b27fd <hypre_ParTMatmul.extracted+0x5bd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4b24e1 <hypre_ParTMatmul.extracted+0x2a1> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RSI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x7192b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 40fed0 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMPQ $0,-0xd8(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 4b2a56 <hypre_ParTMatmul.extracted+0x816> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RBX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x118(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x1,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0xd0(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xc8(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 4b298c <hypre_ParTMatmul.extracted+0x74c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RBX,%RAX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R11,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4b28b1 <hypre_ParTMatmul.extracted+0x671> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R11,%RAX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RBX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4b28b1 <hypre_ParTMatmul.extracted+0x671> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R11,%RDI,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RBX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 4b295e <hypre_ParTMatmul.extracted+0x71e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%RBX,%RDI,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 4b295e <hypre_ParTMatmul.extracted+0x71e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x1,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4b298c <hypre_ParTMatmul.extracted+0x74c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP $0x5,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4b291d <hypre_ParTMatmul.extracted+0x6dd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x2,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV (%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R11),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x20,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JAE 4b298c <hypre_ParTMatmul.extracted+0x74c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV (%RBX,%RCX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R11,%RCX,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4b298c <hypre_ParTMatmul.extracted+0x74c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R11),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x1,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x8(%RBX,%RDI,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x8(%R11,%RDI,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x120(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,(%RAX,%R12,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,(%R10,%R12,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R12,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4ca980 <hypre_CSRMatrixCreate> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x100(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,(%RBX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x78(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4ca980 <hypre_CSRMatrixCreate> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x128(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,(%R12) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x38(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4caa60 <hypre_CSRMatrixInitialize> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x100(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xf0(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xa8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,(%RCX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4caa60 <hypre_CSRMatrixInitialize> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV (%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xf8(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,(%RDX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xe0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,(%RCX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x60(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x7192d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 40fed0 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x130(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4b2a80 <hypre_ParTMatmul.extracted+0x840> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x48(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0xff,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4e0430 <__intel_avx_rep_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x98(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMPQ $0,(%RCX) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV -0xd8(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 4b2ab0 <hypre_ParTMatmul.extracted+0x870> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
TEST %RSI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0x68(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 4b2ae3 <hypre_ParTMatmul.extracted+0x8a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x8(%RAX,%RSI,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xd0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x8(%RCX,%RSI,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 4b2b34 <hypre_ParTMatmul.extracted+0x8f4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4b2aec <hypre_ParTMatmul.extracted+0x8ac> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 4b2b34 <hypre_ParTMatmul.extracted+0x8f4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x48(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4d5720 <hypre_Free> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R15,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x118,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 4d5720 <hypre_Free> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_ParTMatmul.extracted– | 0.09 | 0.01 |
▼Loop 3450 - par_csr_matop.c:3467-3640 - exec– | 0 | 0 |
○Loop 3458 - par_csr_matop.c:3590-3596 - exec | 0.01 | 0.01 |
▼Loop 3451 - par_csr_matop.c:3467-3634 - exec– | 0 | 0 |
○Loop 3455 - par_csr_matop.c:3505-3608 - exec | 0.03 | 0.01 |
○Loop 3454 - par_csr_matop.c:3505-3608 - exec | 0 | 0 |
○Loop 3452 - par_csr_matop.c:3623-3634 - exec | 0 | 0 |
○Loop 3456 - par_csr_matop.c:3505-3608 - exec | 0 | 0 |
○Loop 3453 - par_csr_matop.c:3610-3621 - exec | 0 | 0 |
○Loop 3457 - par_csr_matop.c:3598-3604 - exec | 0 | 0 |
○Loop 3462 - par_csr_matop.c:3537-3540 - exec | 0 | 0 |
○Loop 3473 - par_csr_matop.c:3484-3485 - exec | 0 | 0 |
○Loop 3463 - par_csr_matop.c:3537-3540 - exec | 0 | 0 |
○Loop 3459 - par_csr_matop.c:3571-3572 - exec | 0 | 0 |
○Loop 3461 - par_csr_matop.c:3537-3540 - exec | 0 | 0 |
○Loop 3460 - par_csr_matop.c:3537-3540 - exec | 0 | 0 |
▼Loop 3464 - par_csr_matop.c:3467-3528 - exec– | 0 | 0 |
○Loop 3472 - par_csr_matop.c:3491-3495 - exec | 0.01 | 0 |
○Loop 3471 - par_csr_matop.c:3497-3501 - exec | 0 | 0 |
▼Loop 3465 - par_csr_matop.c:3467-3522 - exec– | 0 | 0 |
○Loop 3469 - par_csr_matop.c:3504-3505 - exec | 0.04 | 0.01 |
○Loop 3470 - par_csr_matop.c:3467-3505 - exec | 0 | 0 |
○Loop 3468 - par_csr_matop.c:3504-3505 - exec | 0 | 0 |
○Loop 3466 - par_csr_matop.c:3516-3522 - exec | 0 | 0 |
○Loop 3467 - par_csr_matop.c:3507-3513 - exec | 0 | 0 |