Function: hypre_IJMatrixSetValuesOMPParCSR.extracted.28 | Module: exec | Source: IJMatrix_parcsr.c:3240-3484 [...] | Coverage: 0.64% |
---|
Function: hypre_IJMatrixSetValuesOMPParCSR.extracted.28 | Module: exec | Source: IJMatrix_parcsr.c:3240-3484 [...] | Coverage: 0.64% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-3661/intel/AMG/build/AMG/AMG/IJ_mv/IJMatrix_parcsr.c: 3240 - 3484 |
-------------------------------------------------------------------------------- |
3240: #pragma omp parallel |
[...] |
3256: num_threads = hypre_NumActiveThreads(); |
3257: my_thread_num = hypre_GetThreadNum(); |
3258: |
3259: len = nrows/num_threads; |
3260: rest = nrows - len*num_threads; |
3261: |
3262: if (my_thread_num < rest) |
3263: { |
3264: ns = my_thread_num*(len+1); |
3265: ne = (my_thread_num+1)*(len+1); |
3266: } |
3267: else |
3268: { |
3269: ns = my_thread_num*len+rest; |
3270: ne = (my_thread_num+1)*len+rest; |
3271: } |
3272: |
3273: value_start[my_thread_num] = 0; |
3274: for (ii=ns; ii < ne; ii++) |
3275: value_start[my_thread_num] += ncols[ii]; |
3276: |
3277: #ifdef HYPRE_USING_OPENMP |
3278: #pragma omp barrier |
3279: #endif |
3280: if (my_thread_num == 0) |
3281: { |
3282: for (i=0; i < max_num_threads; i++) |
3283: value_start[i+1] += value_start[i]; |
[...] |
3289: if (my_thread_num) |
3290: indx = value_start[my_thread_num-1]; |
3291: for (ii=ns; ii < ne; ii++) |
3292: { |
3293: row = rows[ii]; |
3294: n = ncols[ii]; |
3295: /* processor owns the row */ |
3296: if (row >= row_partitioning[pstart] && row < row_partitioning[pstart+1]) |
3297: { |
3298: row_local = row - row_partitioning[pstart]; |
3299: /* compute local row number */ |
3300: if (need_aux) |
3301: { |
3302: local_j = aux_j[row_local]; |
3303: local_data = aux_data[row_local]; |
3304: space = row_space[row_local]; |
3305: old_size = row_length[row_local]; |
3306: size = space - old_size; |
3307: if (size < n) |
3308: { |
3309: size = n - size; |
3310: tmp_j = hypre_CTAlloc(HYPRE_Int,size); |
3311: tmp_data = hypre_CTAlloc(HYPRE_Complex,size); |
3312: } |
3313: tmp_indx = 0; |
3314: not_found = 1; |
3315: size = old_size; |
3316: for (i=0; i < n; i++) |
3317: { |
3318: for (j=0; j < old_size; j++) |
3319: { |
3320: if (local_j[j] == cols[indx]) |
3321: { |
3322: local_data[j] = values[indx]; |
[...] |
3329: if (size < space) |
3330: { |
3331: local_j[size] = cols[indx]; |
3332: local_data[size++] = values[indx]; |
3333: } |
3334: else |
3335: { |
3336: tmp_j[tmp_indx] = cols[indx]; |
3337: tmp_data[tmp_indx++] = values[indx]; |
[...] |
3344: row_length[row_local] = size+tmp_indx; |
3345: |
3346: if (tmp_indx) |
3347: { |
3348: aux_j[row_local] = hypre_TReAlloc(aux_j[row_local],HYPRE_Int, |
3349: size+tmp_indx); |
3350: aux_data[row_local] = hypre_TReAlloc(aux_data[row_local], |
3351: HYPRE_Complex,size+tmp_indx); |
3352: row_space[row_local] = size+tmp_indx; |
3353: local_j = aux_j[row_local]; |
[...] |
3359: for (i=0; i < tmp_indx; i++) |
3360: { |
3361: local_j[cnt] = tmp_j[i]; |
3362: local_data[cnt++] = tmp_data[i]; |
3363: } |
3364: |
3365: if (tmp_j) |
3366: { |
3367: hypre_TFree(tmp_j); |
3368: hypre_TFree(tmp_data); |
[...] |
3376: offd_indx = hypre_AuxParCSRMatrixIndxOffd(aux_matrix)[row_local]; |
3377: diag_indx = hypre_AuxParCSRMatrixIndxDiag(aux_matrix)[row_local]; |
3378: cnt_diag = diag_indx; |
3379: cnt_offd = offd_indx; |
3380: diag_space = diag_i[row_local+1]; |
3381: offd_space = offd_i[row_local+1]; |
3382: not_found = 1; |
3383: for (i=0; i < n; i++) |
3384: { |
3385: if (cols[indx] < col_0 || cols[indx] > col_n) |
3386: /* insert into offd */ |
3387: { |
3388: for (j=offd_i[row_local]; j < offd_indx; j++) |
3389: { |
3390: if (offd_j[j] == cols[indx]) |
3391: { |
3392: offd_data[j] = values[indx]; |
[...] |
3399: if (cnt_offd < offd_space) |
3400: { |
3401: offd_j[cnt_offd] = cols[indx]; |
3402: offd_data[cnt_offd++] = values[indx]; |
3403: } |
3404: else |
3405: { |
3406: hypre_error(HYPRE_ERROR_GENERIC); |
3407: #ifdef HYPRE_USING_OPENMP |
3408: #pragma omp atomic |
3409: #endif |
3410: error_flag++; |
[...] |
3422: for (j=diag_i[row_local]; j < diag_indx; j++) |
3423: { |
3424: if (diag_j[j] == cols[indx]) |
3425: { |
3426: diag_data[j] = values[indx]; |
[...] |
3433: if (cnt_diag < diag_space) |
3434: { |
3435: diag_j[cnt_diag] = cols[indx]; |
3436: diag_data[cnt_diag++] = values[indx]; |
3437: } |
3438: else |
3439: { |
3440: hypre_error(HYPRE_ERROR_GENERIC); |
3441: #ifdef HYPRE_USING_OPENMP |
3442: #pragma omp atomic |
3443: #endif |
3444: error_flag++; |
3445: if (print_level) |
[...] |
3454: indx++; |
3455: } |
3456: |
3457: hypre_AuxParCSRMatrixIndxDiag(aux_matrix)[row_local] = cnt_diag; |
3458: hypre_AuxParCSRMatrixIndxOffd(aux_matrix)[row_local] = cnt_offd; |
[...] |
3466: indx += n; |
3467: if (aux_matrix) |
3468: { |
3469: col_indx = 0; |
3470: for (i=0; i < off_proc_i_indx; i=i+2) |
3471: { |
3472: row_len = off_proc_i[i+1]; |
3473: if (off_proc_i[i] == row) |
3474: { |
3475: for (j=0; j < n; j++) |
3476: { |
3477: cnt1 = col_indx; |
3478: for (k=0; k < row_len; k++) |
3479: { |
3480: if (off_proc_j[cnt1] == cols[j]) |
3481: { |
3482: off_proc_j[cnt1++] = -1; |
3483: /*cancel_indx++;*/ |
3484: offproc_cnt[my_thread_num]++; |
0x4c3c60 PUSH %RBP |
0x4c3c61 MOV %RSP,%RBP |
0x4c3c64 PUSH %R15 |
0x4c3c66 PUSH %R14 |
0x4c3c68 PUSH %R13 |
0x4c3c6a PUSH %R12 |
0x4c3c6c PUSH %RBX |
0x4c3c6d SUB $0x98,%RSP |
0x4c3c74 MOV %R9,%RBX |
0x4c3c77 MOV %R8,-0xb0(%RBP) |
0x4c3c7e MOV %RCX,-0x38(%RBP) |
0x4c3c82 MOV %RDX,%R15 |
0x4c3c85 MOV %RDI,%R12 |
0x4c3c88 CALL 4d6de0 <hypre_NumActiveThreads> |
0x4c3c8d MOV %RAX,%R13 |
0x4c3c90 CALL 4d6df0 <hypre_GetThreadNum> |
0x4c3c95 MOV %RAX,%R14 |
0x4c3c98 MOV %R13,%RAX |
0x4c3c9b OR %R15,%RAX |
0x4c3c9e SHR $0x20,%RAX |
0x4c3ca2 JE 4c3cae |
0x4c3ca4 MOV %R15,%RAX |
0x4c3ca7 CQTO |
0x4c3ca9 IDIV %R13 |
0x4c3cac JMP 4c3cb6 |
0x4c3cae MOV %R15D,%EAX |
0x4c3cb1 XOR %EDX,%EDX |
0x4c3cb3 DIV %R13D |
0x4c3cb6 MOV 0xb8(%RBP),%R15 |
0x4c3cbd LEA 0x1(%R14),%RCX |
0x4c3cc1 CMP %RDX,%R14 |
0x4c3cc4 MOV -0x38(%RBP),%R8 |
0x4c3cc8 MOV %R12,%R13 |
0x4c3ccb JGE 4c3cde |
0x4c3ccd LEA 0x1(%RAX),%RDI |
0x4c3cd1 MOV %RDI,%RSI |
0x4c3cd4 IMUL %R14,%RSI |
0x4c3cd8 IMUL %RCX,%RDI |
0x4c3cdc JMP 4c3cf2 |
0x4c3cde MOV %RAX,%RSI |
0x4c3ce1 IMUL %R14,%RSI |
0x4c3ce5 ADD %RDX,%RSI |
0x4c3ce8 IMUL %RAX,%RCX |
0x4c3cec ADD %RDX,%RCX |
0x4c3cef MOV %RCX,%RDI |
0x4c3cf2 MOVQ $0,(%R15,%R14,8) |
0x4c3cfa MOV %RSI,%R12 |
0x4c3cfd CMP %RDI,%RSI |
0x4c3d00 MOV %RDI,-0x30(%RBP) |
0x4c3d04 JGE 4c3ddf |
0x4c3d0a LEA (%R15,%R14,8),%RCX |
0x4c3d0e LEA -0x8(%R8,%RDI,8),%RDI |
0x4c3d13 CMP %RDX,%R14 |
0x4c3d16 CMOVL %R14,%RDX |
0x4c3d1a IMUL %R14,%RAX |
0x4c3d1e LEA (%RDX,%RAX,1),%RSI |
0x4c3d22 CMP %RCX,%RDI |
0x4c3d25 JB 4c3d54 |
0x4c3d27 LEA (%R8,%RSI,8),%RDI |
0x4c3d2b CMP %RDI,%RCX |
0x4c3d2e JB 4c3d54 |
0x4c3d30 XOR %EAX,%EAX |
0x4c3d32 MOV %R12,%RDX |
0x4c3d35 MOV -0x30(%RBP),%RSI |
0x4c3d39 NOPL (%RAX) |
(3826) 0x4c3d40 ADD (%R8,%RDX,8),%RAX |
(3826) 0x4c3d44 MOV %RAX,(%RCX) |
(3826) 0x4c3d47 INC %RDX |
(3826) 0x4c3d4a CMP %RDX,%RSI |
(3826) 0x4c3d4d JNE 4c3d40 |
0x4c3d4f JMP 4c3ddf |
0x4c3d54 MOV -0x30(%RBP),%R8 |
0x4c3d58 SUB %RSI,%R8 |
0x4c3d5b MOV %R8,%RSI |
0x4c3d5e AND $-0x4,%RSI |
0x4c3d62 JE 4c3db6 |
0x4c3d64 LEA -0x1(%RSI),%RDI |
0x4c3d68 MOV -0x38(%RBP),%R9 |
0x4c3d6c LEA (%R9,%R12,8),%R9 |
0x4c3d70 VPXOR %XMM0,%XMM0,%XMM0 |
0x4c3d74 XOR %R10D,%R10D |
0x4c3d77 NOPW (%RAX,%RAX,1) |
(3825) 0x4c3d80 VPADDQ (%R9,%R10,8),%YMM0,%YMM0 |
(3825) 0x4c3d86 ADD $0x4,%R10 |
(3825) 0x4c3d8a CMP %RDI,%R10 |
(3825) 0x4c3d8d JBE 4c3d80 |
0x4c3d8f VEXTRACTI128 $0x1,%YMM0,%XMM1 |
0x4c3d95 VPADDQ %XMM1,%XMM0,%XMM0 |
0x4c3d99 VPSHUFD $-0x12,%XMM0,%XMM1 |
0x4c3d9e VPADDQ %XMM1,%XMM0,%XMM0 |
0x4c3da2 VMOVQ %XMM0,%RDI |
0x4c3da7 CMP %RSI,%R8 |
0x4c3daa MOV -0x38(%RBP),%R8 |
0x4c3dae MOV -0x30(%RBP),%R9 |
0x4c3db2 JNE 4c3dc2 |
0x4c3db4 JMP 4c3ddc |
0x4c3db6 XOR %ESI,%ESI |
0x4c3db8 XOR %EDI,%EDI |
0x4c3dba MOV -0x38(%RBP),%R8 |
0x4c3dbe MOV -0x30(%RBP),%R9 |
0x4c3dc2 ADD %RDX,%RSI |
0x4c3dc5 ADD %RAX,%RSI |
0x4c3dc8 NOPL (%RAX,%RAX,1) |
(3824) 0x4c3dd0 ADD (%R8,%RSI,8),%RDI |
(3824) 0x4c3dd4 INC %RSI |
(3824) 0x4c3dd7 CMP %RSI,%R9 |
(3824) 0x4c3dda JNE 4c3dd0 |
0x4c3ddc MOV %RDI,(%RCX) |
0x4c3ddf MOV (%R13),%ESI |
0x4c3de3 MOV $0x719570,%EDI |
0x4c3de8 VZEROUPPER |
0x4c3deb CALL 40fef0 <__kmpc_barrier@plt> |
0x4c3df0 TEST %R14,%R14 |
0x4c3df3 JNE 4c3e94 |
0x4c3df9 MOV 0xc8(%RBP),%RAX |
0x4c3e00 TEST %RAX,%RAX |
0x4c3e03 JLE 4c3e94 |
0x4c3e09 CMP $0x8,%RAX |
0x4c3e0d JB 4c3e67 |
0x4c3e0f MOV %RAX,%RCX |
0x4c3e12 SHR $0x3,%RCX |
0x4c3e16 MOV (%R15),%RDX |
0x4c3e19 LEA 0x40(%R15),%RSI |
0x4c3e1d NOPL (%RAX) |
(3823) 0x4c3e20 ADD -0x38(%RSI),%RDX |
(3823) 0x4c3e24 MOV %RDX,-0x38(%RSI) |
(3823) 0x4c3e28 ADD -0x30(%RSI),%RDX |
(3823) 0x4c3e2c MOV %RDX,-0x30(%RSI) |
(3823) 0x4c3e30 ADD -0x28(%RSI),%RDX |
(3823) 0x4c3e34 MOV %RDX,-0x28(%RSI) |
(3823) 0x4c3e38 ADD -0x20(%RSI),%RDX |
(3823) 0x4c3e3c MOV %RDX,-0x20(%RSI) |
(3823) 0x4c3e40 ADD -0x18(%RSI),%RDX |
(3823) 0x4c3e44 MOV %RDX,-0x18(%RSI) |
(3823) 0x4c3e48 ADD -0x10(%RSI),%RDX |
(3823) 0x4c3e4c MOV %RDX,-0x10(%RSI) |
(3823) 0x4c3e50 ADD -0x8(%RSI),%RDX |
(3823) 0x4c3e54 MOV %RDX,-0x8(%RSI) |
(3823) 0x4c3e58 ADD (%RSI),%RDX |
(3823) 0x4c3e5b MOV %RDX,(%RSI) |
(3823) 0x4c3e5e ADD $0x40,%RSI |
(3823) 0x4c3e62 DEC %RCX |
(3823) 0x4c3e65 JNE 4c3e20 |
0x4c3e67 MOV %RAX,%RCX |
0x4c3e6a AND $-0x8,%RCX |
0x4c3e6e CMP %RAX,%RCX |
0x4c3e71 JE 4c3e94 |
0x4c3e73 MOV (%R15,%RCX,8),%RDX |
0x4c3e77 NOPW (%RAX,%RAX,1) |
(3822) 0x4c3e80 LEA (%R15,%RCX,8),%RSI |
(3822) 0x4c3e84 INC %RCX |
(3822) 0x4c3e87 ADD 0x8(%RSI),%RDX |
(3822) 0x4c3e8b MOV %RDX,0x8(%RSI) |
(3822) 0x4c3e8f CMP %RCX,%RAX |
(3822) 0x4c3e92 JNE 4c3e80 |
0x4c3e94 MOV (%R13),%ESI |
0x4c3e98 MOV $0x719590,%EDI |
0x4c3e9d CALL 40fef0 <__kmpc_barrier@plt> |
0x4c3ea2 TEST %R14,%R14 |
0x4c3ea5 JE 4c3eb2 |
0x4c3ea7 MOV -0x8(%R15,%R14,8),%RAX |
0x4c3eac MOV %RAX,-0x60(%RBP) |
0x4c3eb0 JMP 4c3eba |
0x4c3eb2 MOVQ $0,-0x60(%RBP) |
0x4c3eba MOV -0x38(%RBP),%RDX |
0x4c3ebe MOV %R12,%RSI |
0x4c3ec1 MOV -0x30(%RBP),%RDI |
0x4c3ec5 CMP %RDI,%R12 |
0x4c3ec8 JGE 4c4a0f |
0x4c3ece MOV 0xb0(%RBP),%RAX |
0x4c3ed5 MOV 0x98(%RBP),%RCX |
0x4c3edc LEA (%RAX,%R14,8),%R15 |
0x4c3ee0 LEA -0x1(%RCX),%RAX |
0x4c3ee4 SHR $0x1,%RAX |
0x4c3ee7 MOV %RAX,-0xb8(%RBP) |
0x4c3eee VPCMPEQD %YMM2,%YMM2,%YMM2 |
0x4c3ef2 MOV 0x20(%RBP),%R8 |
0x4c3ef6 MOVQ $0,-0x88(%RBP) |
0x4c3f01 JMP 4c3f2c |
0x4c3f03 NOPW %CS:(%RAX,%RAX,1) |
(3801) 0x4c3f10 MOV -0x38(%RBP),%RDX |
(3801) 0x4c3f14 MOV -0x50(%RBP),%RSI |
(3801) 0x4c3f18 MOV -0x30(%RBP),%RDI |
(3801) 0x4c3f1c MOV 0x20(%RBP),%R8 |
(3801) 0x4c3f20 INC %RSI |
(3801) 0x4c3f23 CMP %RDI,%RSI |
(3801) 0x4c3f26 JGE 4c4a0f |
(3801) 0x4c3f2c MOV -0xb0(%RBP),%RAX |
(3801) 0x4c3f33 MOV (%RAX,%RSI,8),%R10 |
(3801) 0x4c3f37 MOV (%RDX,%RSI,8),%R9 |
(3801) 0x4c3f3b MOV %R10,-0x48(%RBP) |
(3801) 0x4c3f3f SUB (%R8),%R10 |
(3801) 0x4c3f42 MOV %R9,-0x78(%RBP) |
(3801) 0x4c3f46 JL 4c3ff0 |
(3801) 0x4c3f4c MOV -0x48(%RBP),%RAX |
(3801) 0x4c3f50 CMP 0x8(%R8),%RAX |
(3801) 0x4c3f54 JGE 4c3ff0 |
(3801) 0x4c3f5a CMPQ $0,0x58(%RBP) |
(3801) 0x4c3f5f MOV %R10,-0x40(%RBP) |
(3801) 0x4c3f63 JE 4c41cf |
(3801) 0x4c3f69 MOV 0x38(%RBP),%RAX |
(3801) 0x4c3f6d MOV (%RAX,%R10,8),%R13 |
(3801) 0x4c3f71 MOV 0x40(%RBP),%RAX |
(3801) 0x4c3f75 MOV (%RAX,%R10,8),%RAX |
(3801) 0x4c3f79 MOV %RAX,-0x58(%RBP) |
(3801) 0x4c3f7d MOV 0x50(%RBP),%RAX |
(3801) 0x4c3f81 MOV (%RAX,%R10,8),%RCX |
(3801) 0x4c3f85 MOV 0x48(%RBP),%RAX |
(3801) 0x4c3f89 MOV (%RAX,%R10,8),%R12 |
(3801) 0x4c3f8d MOV %RCX,-0x80(%RBP) |
(3801) 0x4c3f91 MOV %RCX,%RAX |
(3801) 0x4c3f94 SUB %R12,%RAX |
(3801) 0x4c3f97 MOV %R9,%R14 |
(3801) 0x4c3f9a SUB %RAX,%R14 |
(3801) 0x4c3f9d MOV %RSI,-0x50(%RBP) |
(3801) 0x4c3fa1 JLE 4c4236 |
(3801) 0x4c3fa7 MOV $0x8,%ESI |
(3801) 0x4c3fac MOV %R14,%RDI |
(3801) 0x4c3faf VZEROUPPER |
(3801) 0x4c3fb2 CALL 4d5130 <hypre_CAlloc> |
(3801) 0x4c3fb7 MOV %RAX,-0x68(%RBP) |
(3801) 0x4c3fbb MOV $0x8,%ESI |
(3801) 0x4c3fc0 MOV %R14,%RDI |
(3801) 0x4c3fc3 CALL 4d5130 <hypre_CAlloc> |
(3801) 0x4c3fc8 MOV -0x40(%RBP),%R10 |
(3801) 0x4c3fcc MOV -0x78(%RBP),%R9 |
(3801) 0x4c3fd0 MOV %RAX,-0x88(%RBP) |
(3801) 0x4c3fd7 TEST %R9,%R9 |
(3801) 0x4c3fda JG 4c4247 |
(3801) 0x4c3fe0 MOV 0x48(%RBP),%RAX |
(3801) 0x4c3fe4 MOV %R12,(%RAX,%R10,8) |
(3801) 0x4c3fe8 JMP 4c450d |
0x4c3fed NOPL (%RAX) |
(3801) 0x4c3ff0 ADD %R9,-0x60(%RBP) |
(3801) 0x4c3ff4 MOV 0x18(%RBP),%RAX |
(3801) 0x4c3ff8 CMPQ $0,(%RAX) |
(3801) 0x4c3ffc JE 4c3f20 |
(3801) 0x4c4002 CMPQ $0,0x98(%RBP) |
(3801) 0x4c400a JLE 4c3f20 |
(3801) 0x4c4010 MOV %RSI,-0x50(%RBP) |
(3801) 0x4c4014 CMP %RBX,%R15 |
(3801) 0x4c4017 SETB %CL |
(3801) 0x4c401a LEA -0x1(%R9),%RAX |
(3801) 0x4c401e LEA -0x8(%RBX,%R9,8),%RDX |
(3801) 0x4c4023 MOV %RDX,-0x80(%RBP) |
(3801) 0x4c4027 CMP %R15,%RDX |
(3801) 0x4c402a SETB %DL |
(3801) 0x4c402d OR %CL,%DL |
(3801) 0x4c402f XOR %EDI,%EDI |
(3801) 0x4c4031 XOR %ESI,%ESI |
(3801) 0x4c4033 JMP 4c4054 |
0x4c4035 NOPW %CS:(%RAX,%RAX,1) |
(3802) 0x4c4040 LEA 0x1(%RSI),%RCX |
(3802) 0x4c4044 CMP -0xb8(%RBP),%RSI |
(3802) 0x4c404b MOV %RCX,%RSI |
(3802) 0x4c404e JE 4c3f10 |
(3802) 0x4c4054 MOV %RDI,%RCX |
(3802) 0x4c4057 MOV %RSI,%R9 |
(3802) 0x4c405a SAL $0x4,%R9 |
(3802) 0x4c405e MOV 0xa0(%RBP),%R10 |
(3802) 0x4c4065 MOV 0x8(%R10,%R9,1),%R8 |
(3802) 0x4c406a ADD %R8,%RDI |
(3802) 0x4c406d MOV -0x48(%RBP),%R11 |
(3802) 0x4c4071 CMP %R11,(%R10,%R9,1) |
(3802) 0x4c4075 JNE 4c4040 |
(3802) 0x4c4077 CMPQ $0,-0x78(%RBP) |
(3802) 0x4c407c JLE 4c4040 |
(3802) 0x4c407e MOV 0xa8(%RBP),%R9 |
(3802) 0x4c4085 LEA -0x8(%R9,%RDI,8),%R10 |
(3802) 0x4c408a CMP %R15,%R10 |
(3802) 0x4c408d SETB %R11B |
(3802) 0x4c4091 LEA (%R9,%RCX,8),%R9 |
(3802) 0x4c4095 CMP %R9,%R15 |
(3802) 0x4c4098 SETB %CL |
(3802) 0x4c409b OR %R11B,%CL |
(3802) 0x4c409e CMP %RBX,%R10 |
(3802) 0x4c40a1 SETB %R10B |
(3802) 0x4c40a5 CMP %R9,-0x80(%RBP) |
(3802) 0x4c40a9 SETB %R11B |
(3802) 0x4c40ad OR %R10B,%R11B |
(3802) 0x4c40b0 AND %CL,%R11B |
(3802) 0x4c40b3 TEST %DL,%R11B |
(3802) 0x4c40b6 JE 4c4180 |
(3802) 0x4c40bc MOV %R8,%RCX |
(3802) 0x4c40bf AND $-0x4,%RCX |
(3802) 0x4c40c3 LEA -0x1(%RCX),%R11 |
(3802) 0x4c40c7 XOR %R12D,%R12D |
(3802) 0x4c40ca JMP 4c40e1 |
0x4c40cc NOPL (%RAX) |
(3805) 0x4c40d0 LEA 0x1(%R12),%R10 |
(3805) 0x4c40d5 CMP %RAX,%R12 |
(3805) 0x4c40d8 MOV %R10,%R12 |
(3805) 0x4c40db JE 4c4040 |
(3805) 0x4c40e1 TEST %R8,%R8 |
(3805) 0x4c40e4 JLE 4c40d0 |
(3805) 0x4c40e6 MOV (%RBX,%R12,8),%R13 |
(3805) 0x4c40ea TEST %RCX,%RCX |
(3805) 0x4c40ed JE 4c4150 |
(3805) 0x4c40ef VPBROADCASTQ %R13,%YMM1 |
(3805) 0x4c40f5 VPXOR %XMM0,%XMM0,%XMM0 |
(3805) 0x4c40f9 XOR %R10D,%R10D |
(3805) 0x4c40fc NOPL (%RAX) |
(3807) 0x4c4100 VPCMPEQQ (%R9,%R10,8),%YMM1,%K1 |
(3807) 0x4c4107 VMOVDQU64 %YMM2,(%R9,%R10,8){%K1} |
(3807) 0x4c410e VPSUBQ %YMM2,%YMM0,%YMM0{%K1} |
(3807) 0x4c4114 ADD $0x4,%R10 |
(3807) 0x4c4118 CMP %R11,%R10 |
(3807) 0x4c411b JLE 4c4100 |
(3805) 0x4c411d VEXTRACTI128 $0x1,%YMM0,%XMM1 |
(3805) 0x4c4123 VPADDQ %XMM1,%XMM0,%XMM0 |
(3805) 0x4c4127 VPSHUFD $-0x12,%XMM0,%XMM1 |
(3805) 0x4c412c VPADDQ %XMM1,%XMM0,%XMM0 |
(3805) 0x4c4130 VMOVQ %XMM0,%R14 |
(3805) 0x4c4135 MOV %RCX,%R10 |
(3805) 0x4c4138 CMP %RCX,%R8 |
(3805) 0x4c413b JNE 4c4168 |
(3805) 0x4c413d TEST %R14,%R14 |
(3805) 0x4c4140 JE 4c40d0 |
(3805) 0x4c4142 ADD %R14,(%R15) |
(3805) 0x4c4145 JMP 4c40d0 |
0x4c4147 NOPW (%RAX,%RAX,1) |
(3805) 0x4c4150 XOR %R14D,%R14D |
(3805) 0x4c4153 XOR %R10D,%R10D |
(3805) 0x4c4156 JMP 4c4168 |
0x4c4158 NOPL (%RAX,%RAX,1) |
(3806) 0x4c4160 INC %R10 |
(3806) 0x4c4163 CMP %R10,%R8 |
(3806) 0x4c4166 JE 4c413d |
(3806) 0x4c4168 CMP %R13,(%R9,%R10,8) |
(3806) 0x4c416c JNE 4c4160 |
(3806) 0x4c416e MOVQ $-0x1,(%R9,%R10,8) |
(3806) 0x4c4176 INC %R14 |
(3806) 0x4c4179 JMP 4c4160 |
0x4c417b NOPL (%RAX,%RAX,1) |
(3802) 0x4c4180 XOR %ECX,%ECX |
(3802) 0x4c4182 JMP 4c41a0 |
0x4c4184 NOPW %CS:(%RAX,%RAX,1) |
(3803) 0x4c4190 LEA 0x1(%RCX),%R10 |
(3803) 0x4c4194 CMP %RAX,%RCX |
(3803) 0x4c4197 MOV %R10,%RCX |
(3803) 0x4c419a JE 4c4040 |
(3803) 0x4c41a0 TEST %R8,%R8 |
(3803) 0x4c41a3 JLE 4c4190 |
(3803) 0x4c41a5 XOR %R10D,%R10D |
(3803) 0x4c41a8 JMP 4c41b8 |
0x4c41aa NOPW (%RAX,%RAX,1) |
(3804) 0x4c41b0 INC %R10 |
(3804) 0x4c41b3 CMP %R10,%R8 |
(3804) 0x4c41b6 JE 4c4190 |
(3804) 0x4c41b8 MOV (%R9,%R10,8),%R11 |
(3804) 0x4c41bc CMP (%RBX,%RCX,8),%R11 |
(3804) 0x4c41c0 JNE 4c41b0 |
(3804) 0x4c41c2 MOVQ $-0x1,(%R9,%R10,8) |
(3804) 0x4c41ca INCQ (%R15) |
(3804) 0x4c41cd JMP 4c41b0 |
(3801) 0x4c41cf MOV 0x18(%RBP),%RAX |
(3801) 0x4c41d3 MOV (%RAX),%RAX |
(3801) 0x4c41d6 MOV 0x38(%RAX),%RCX |
(3801) 0x4c41da MOV 0x40(%RAX),%RAX |
(3801) 0x4c41de MOV (%RAX,%R10,8),%RAX |
(3801) 0x4c41e2 MOV (%RCX,%R10,8),%RCX |
(3801) 0x4c41e6 TEST %R9,%R9 |
(3801) 0x4c41e9 JLE 4c451f |
(3801) 0x4c41ef MOV %RSI,-0x50(%RBP) |
(3801) 0x4c41f3 MOV %RAX,%RSI |
(3801) 0x4c41f6 MOV 0x60(%RBP),%RAX |
(3801) 0x4c41fa MOV 0x8(%RAX,%R10,8),%RAX |
(3801) 0x4c41ff MOV %RAX,-0x98(%RBP) |
(3801) 0x4c4206 MOV 0x78(%RBP),%RAX |
(3801) 0x4c420a MOV 0x8(%RAX,%R10,8),%RAX |
(3801) 0x4c420f MOV %RAX,-0x68(%RBP) |
(3801) 0x4c4213 MOV -0x60(%RBP),%RDX |
(3801) 0x4c4217 LEA (%RDX,%R9,1),%RAX |
(3801) 0x4c421b MOV %RAX,-0x90(%RBP) |
(3801) 0x4c4222 XOR %R8D,%R8D |
(3801) 0x4c4225 MOV %RSI,-0x80(%RBP) |
(3801) 0x4c4229 MOV %RSI,-0x58(%RBP) |
(3801) 0x4c422d MOV %RCX,-0x70(%RBP) |
(3801) 0x4c4231 JMP 4c4584 |
(3801) 0x4c4236 MOVQ $0,-0x68(%RBP) |
(3801) 0x4c423e TEST %R9,%R9 |
(3801) 0x4c4241 JLE 4c3fe0 |
(3801) 0x4c4247 LEA -0x1(%R9),%RAX |
(3801) 0x4c424b MOV %R13D,%ECX |
(3801) 0x4c424e AND $0x7f,%ECX |
(3801) 0x4c4251 MOV $0x80,%ESI |
(3801) 0x4c4256 MOV %RCX,-0x70(%RBP) |
(3801) 0x4c425a SUB %ECX,%ESI |
(3801) 0x4c425c SHR $0x3,%ESI |
(3801) 0x4c425f CMP %RSI,%R12 |
(3801) 0x4c4262 MOV %RSI,-0x98(%RBP) |
(3801) 0x4c4269 CMOVB %R12,%RSI |
(3801) 0x4c426d MOV %R12,%RCX |
(3801) 0x4c4270 SUB %RSI,%RCX |
(3801) 0x4c4273 MOV %RCX,-0xa8(%RBP) |
(3801) 0x4c427a AND $-0x10,%RCX |
(3801) 0x4c427e LEA -0x1(%RCX),%R9 |
(3801) 0x4c4282 MOV %ESI,%EDX |
(3801) 0x4c4284 LEA (%R13,%RDX,8),%R10 |
(3801) 0x4c4289 MOV %RCX,-0x90(%RBP) |
(3801) 0x4c4290 ADD %RSI,%RCX |
(3801) 0x4c4293 MOV %RCX,-0xa0(%RBP) |
(3801) 0x4c429a MOVQ $0,-0x48(%RBP) |
(3801) 0x4c42a2 MOV %R12,%R14 |
(3801) 0x4c42a5 XOR %R11D,%R11D |
(3801) 0x4c42a8 JMP 4c42ec |
0x4c42aa NOPW (%RAX,%RAX,1) |
(3818) 0x4c42b0 MOV -0x68(%RBP),%R8 |
(3818) 0x4c42b4 MOV -0x48(%RBP),%RCX |
(3818) 0x4c42b8 MOV %RDX,(%R8,%RCX,8) |
(3818) 0x4c42bc MOV -0x88(%RBP),%RDX |
(3818) 0x4c42c3 LEA (%RDX,%RCX,8),%RDX |
(3818) 0x4c42c7 INC %RCX |
(3818) 0x4c42ca MOV %RCX,-0x48(%RBP) |
(3818) 0x4c42ce MOV 0x10(%RBP),%R8 |
(3818) 0x4c42d2 VMOVQ (%R8,%RDI,8),%XMM0 |
(3818) 0x4c42d8 VMOVQ %XMM0,(%RDX) |
(3818) 0x4c42dc LEA 0x1(%R11),%RDX |
(3818) 0x4c42e0 CMP %RAX,%R11 |
(3818) 0x4c42e3 MOV %RDX,%R11 |
(3818) 0x4c42e6 JE 4c43fe |
(3818) 0x4c42ec TEST %R12,%R12 |
(3818) 0x4c42ef JLE 4c43a0 |
(3818) 0x4c42f5 MOV -0x60(%RBP),%RDX |
(3818) 0x4c42f9 LEA (%RDX,%R11,1),%RDI |
(3818) 0x4c42fd MOV (%RBX,%RDI,8),%RDX |
(3818) 0x4c4301 CMPL $0x78,-0x70(%RBP) |
(3818) 0x4c4305 JA 4c4323 |
(3818) 0x4c4307 XOR %R8D,%R8D |
(3818) 0x4c430a NOPW (%RAX,%RAX,1) |
(3821) 0x4c4310 CMP %RDX,(%R13,%R8,8) |
(3821) 0x4c4315 JE 4c43f1 |
(3821) 0x4c431b INC %R8 |
(3821) 0x4c431e CMP %R8,%RSI |
(3821) 0x4c4321 JNE 4c4310 |
(3818) 0x4c4323 CMP %R12,-0x98(%RBP) |
(3818) 0x4c432a JAE 4c43a0 |
(3818) 0x4c432c CMPQ $0,-0x90(%RBP) |
(3818) 0x4c4334 JE 4c4376 |
(3818) 0x4c4336 VPBROADCASTQ %RDX,%YMM0 |
(3818) 0x4c433c XOR %R8D,%R8D |
(3818) 0x4c433f NOP |
(3820) 0x4c4340 VPCMPEQQ 0x20(%R10,%R8,8),%YMM0,%K0 |
(3820) 0x4c4348 VPCMPEQQ (%R10,%R8,8),%YMM0,%K1 |
(3820) 0x4c434f VPCMPEQQ 0x60(%R10,%R8,8),%YMM0,%K2 |
(3820) 0x4c4357 VPCMPEQQ 0x40(%R10,%R8,8),%YMM0,%K3 |
(3820) 0x4c435f KORB %K0,%K1,%K4 |
(3820) 0x4c4363 KORB %K2,%K3,%K5 |
(3820) 0x4c4367 KORTESTB %K5,%K4 |
(3820) 0x4c436b JNE 4c43cb |
(3820) 0x4c436d ADD $0x10,%R8 |
(3820) 0x4c4371 CMP %R9,%R8 |
(3820) 0x4c4374 JBE 4c4340 |
(3818) 0x4c4376 MOV -0x90(%RBP),%RCX |
(3818) 0x4c437d CMP -0xa8(%RBP),%RCX |
(3818) 0x4c4384 JE 4c43a0 |
(3818) 0x4c4386 MOV -0xa0(%RBP),%R8 |
(3818) 0x4c438d NOPL (%RAX) |
(3819) 0x4c4390 CMP %RDX,(%R13,%R8,8) |
(3819) 0x4c4395 JE 4c43f1 |
(3819) 0x4c4397 INC %R8 |
(3819) 0x4c439a CMP %R8,%R12 |
(3819) 0x4c439d JNE 4c4390 |
(3818) 0x4c439f NOP |
(3818) 0x4c43a0 MOV -0x60(%RBP),%RDX |
(3818) 0x4c43a4 LEA (%RDX,%R11,1),%RDI |
(3818) 0x4c43a8 MOV (%RBX,%RDI,8),%RDX |
(3818) 0x4c43ac CMP -0x80(%RBP),%R14 |
(3818) 0x4c43b0 JGE 4c42b0 |
(3818) 0x4c43b6 MOV %RDX,(%R13,%R14,8) |
(3818) 0x4c43bb MOV -0x58(%RBP),%RCX |
(3818) 0x4c43bf LEA (%RCX,%R14,8),%RDX |
(3818) 0x4c43c3 INC %R14 |
(3818) 0x4c43c6 JMP 4c42ce |
(3818) 0x4c43cb KSHIFTLB $0x4,%K0,%K0 |
(3818) 0x4c43d1 KORB %K0,%K1,%K0 |
(3818) 0x4c43d5 KSHIFTLB $0x4,%K2,%K1 |
(3818) 0x4c43db KORB %K1,%K3,%K1 |
(3818) 0x4c43df KUNPCKBW %K0,%K1,%K0 |
(3818) 0x4c43e3 KMOVD %K0,%EDX |
(3818) 0x4c43e7 TZCNT %EDX,%EDX |
(3818) 0x4c43eb ADD %RSI,%R8 |
(3818) 0x4c43ee ADD %RDX,%R8 |
(3818) 0x4c43f1 MOV -0x58(%RBP),%RCX |
(3818) 0x4c43f5 LEA (%RCX,%R8,8),%RDX |
(3818) 0x4c43f9 JMP 4c42ce |
(3801) 0x4c43fe MOV -0x78(%RBP),%RAX |
(3801) 0x4c4402 ADD %RAX,-0x60(%RBP) |
(3801) 0x4c4406 MOV -0x48(%RBP),%RDX |
(3801) 0x4c440a LEA (%R14,%RDX,1),%R12 |
(3801) 0x4c440e MOV 0x48(%RBP),%RAX |
(3801) 0x4c4412 MOV -0x40(%RBP),%RCX |
(3801) 0x4c4416 MOV %R12,(%RAX,%RCX,8) |
(3801) 0x4c441a TEST %RDX,%RDX |
(3801) 0x4c441d JE 4c450d |
(3801) 0x4c4423 MOV 0x38(%RBP),%RAX |
(3801) 0x4c4427 MOV (%RAX,%RCX,8),%RDI |
(3801) 0x4c442b LEA (,%R12,8),%R13 |
(3801) 0x4c4433 MOV %R13,%RSI |
(3801) 0x4c4436 VZEROUPPER |
(3801) 0x4c4439 CALL 4d5190 <hypre_ReAlloc> |
(3801) 0x4c443e MOV -0x40(%RBP),%RCX |
(3801) 0x4c4442 MOV 0x38(%RBP),%RDX |
(3801) 0x4c4446 MOV %RAX,(%RDX,%RCX,8) |
(3801) 0x4c444a MOV 0x40(%RBP),%RCX |
(3801) 0x4c444e MOV -0x40(%RBP),%RAX |
(3801) 0x4c4452 MOV (%RCX,%RAX,8),%RDI |
(3801) 0x4c4456 MOV %R13,%RSI |
(3801) 0x4c4459 CALL 4d5190 <hypre_ReAlloc> |
(3801) 0x4c445e MOV -0x48(%RBP),%RDX |
(3801) 0x4c4462 MOV -0x40(%RBP),%RSI |
(3801) 0x4c4466 MOV 0x40(%RBP),%RCX |
(3801) 0x4c446a MOV %RAX,(%RCX,%RSI,8) |
(3801) 0x4c446e MOV 0x50(%RBP),%RCX |
(3801) 0x4c4472 MOV %R12,(%RCX,%RSI,8) |
(3801) 0x4c4476 TEST %RDX,%RDX |
(3801) 0x4c4479 JLE 4c450d |
(3801) 0x4c447f MOV 0x38(%RBP),%RCX |
(3801) 0x4c4483 MOV (%RCX,%RSI,8),%RCX |
(3801) 0x4c4487 MOV -0x68(%RBP),%R13 |
(3801) 0x4c448b LEA -0x8(%R13,%RDX,8),%RSI |
(3801) 0x4c4490 LEA (%RCX,%R14,8),%RDI |
(3801) 0x4c4494 CMP %RDI,%RSI |
(3801) 0x4c4497 SETAE %R9B |
(3801) 0x4c449b LEA -0x8(%RCX,%R12,8),%RCX |
(3801) 0x4c44a0 CMP %R13,%RCX |
(3801) 0x4c44a3 SETAE %SIL |
(3801) 0x4c44a7 MOV -0x88(%RBP),%R8 |
(3801) 0x4c44ae LEA -0x8(%R8,%RDX,8),%RCX |
(3801) 0x4c44b3 LEA (%RAX,%R14,8),%R10 |
(3801) 0x4c44b7 CMP %R10,%RCX |
(3801) 0x4c44ba SETB %CL |
(3801) 0x4c44bd LEA -0x8(%RAX,%R12,8),%RAX |
(3801) 0x4c44c2 CMP %R8,%RAX |
(3801) 0x4c44c5 SETB %AL |
(3801) 0x4c44c8 TEST %SIL,%R9B |
(3801) 0x4c44cb JNE 4c4885 |
(3801) 0x4c44d1 OR %AL,%CL |
(3801) 0x4c44d3 JE 4c4885 |
(3801) 0x4c44d9 CMP $0xd,%RDX |
(3801) 0x4c44dd JB 4c4974 |
(3801) 0x4c44e3 SAL $0x3,%RDX |
(3801) 0x4c44e7 MOV %RDX,-0x48(%RBP) |
(3801) 0x4c44eb MOV %R13,%RSI |
(3801) 0x4c44ee MOV %R8,%R14 |
(3801) 0x4c44f1 MOV %R10,%R12 |
(3801) 0x4c44f4 CALL 4de8a0 <__intel_avx_rep_memcpy> |
(3801) 0x4c44f9 MOV %R12,%RDI |
(3801) 0x4c44fc MOV %R14,%RSI |
(3801) 0x4c44ff MOV -0x48(%RBP),%RDX |
(3801) 0x4c4503 CALL 4de8a0 <__intel_avx_rep_memcpy> |
(3801) 0x4c4508 JMP 4c49b7 |
(3801) 0x4c450d MOV -0x68(%RBP),%R13 |
(3801) 0x4c4511 TEST %R13,%R13 |
(3801) 0x4c4514 JNE 4c49b7 |
(3801) 0x4c451a JMP 4c49d9 |
(3801) 0x4c451f MOV %RCX,%R11 |
(3801) 0x4c4522 MOV %RAX,%R9 |
(3801) 0x4c4525 JMP 4c4917 |
(3808) 0x4c452a KSHIFTLB $0x4,%K0,%K0 |
(3808) 0x4c4530 KORB %K0,%K1,%K0 |
(3808) 0x4c4534 KSHIFTLB $0x4,%K2,%K1 |
(3808) 0x4c453a KORB %K1,%K3,%K1 |
(3808) 0x4c453e KUNPCKBW %K0,%K1,%K0 |
(3808) 0x4c4542 KMOVD %K0,%EDX |
(3808) 0x4c4546 TZCNT %EDX,%EDX |
(3808) 0x4c454a ADD %RSI,%R12 |
(3808) 0x4c454d ADD %RDX,%R12 |
(3808) 0x4c4550 MOV -0x30(%RBP),%RDI |
(3808) 0x4c4554 MOV %RAX,%RDX |
(3808) 0x4c4557 MOV 0x10(%RBP),%RAX |
(3808) 0x4c455b VMOVQ (%RAX,%RDX,8),%XMM0 |
(3808) 0x4c4560 MOV 0x88(%RBP),%RAX |
(3808) 0x4c4567 VMOVQ %XMM0,(%RAX,%R12,8) |
(3808) 0x4c456d MOV -0x78(%RBP),%RAX |
(3808) 0x4c4571 MOV -0x40(%RBP),%R10 |
(3808) 0x4c4575 INC %RDX |
(3808) 0x4c4578 INC %R8 |
(3808) 0x4c457b CMP %RAX,%R8 |
(3808) 0x4c457e JE 4c4869 |
(3808) 0x4c4584 MOV (%RBX,%RDX,8),%R9 |
(3808) 0x4c4588 CMP 0x28(%RBP),%R9 |
(3808) 0x4c458c JL 4c46e0 |
(3808) 0x4c4592 CMP 0x30(%RBP),%R9 |
(3808) 0x4c4596 JG 4c46e0 |
(3808) 0x4c459c MOV 0x60(%RBP),%RAX |
(3808) 0x4c45a0 MOV (%RAX,%R10,8),%R10 |
(3808) 0x4c45a4 MOV %RCX,%R11 |
(3808) 0x4c45a7 SUB %R10,%R11 |
(3808) 0x4c45aa JLE 4c46a0 |
(3808) 0x4c45b0 MOV %RDX,%RAX |
(3808) 0x4c45b3 MOV 0x68(%RBP),%RDX |
(3808) 0x4c45b7 LEA (%RDX,%R10,8),%EDX |
(3808) 0x4c45bb AND $0x7f,%EDX |
(3808) 0x4c45be MOV $0x80,%ESI |
(3808) 0x4c45c3 SUB %EDX,%ESI |
(3808) 0x4c45c5 SHR $0x3,%ESI |
(3808) 0x4c45c8 CMP %RSI,%R11 |
(3808) 0x4c45cb MOV %RSI,%R13 |
(3808) 0x4c45ce CMOVB %R11,%R13 |
(3808) 0x4c45d2 TEST %R13,%R13 |
(3808) 0x4c45d5 JE 4c45f6 |
(3808) 0x4c45d7 MOV %R10,%R12 |
(3808) 0x4c45da MOV %R13,%RDX |
(3808) 0x4c45dd NOPL (%RAX) |
(3814) 0x4c45e0 MOV 0x68(%RBP),%R14 |
(3814) 0x4c45e4 CMP %R9,(%R14,%R12,8) |
(3814) 0x4c45e8 JE 4c4854 |
(3814) 0x4c45ee INC %R12 |
(3814) 0x4c45f1 DEC %RDX |
(3814) 0x4c45f4 JNE 4c45e0 |
(3808) 0x4c45f6 CMP %R11,%RSI |
(3808) 0x4c45f9 MOV %RAX,%RDX |
(3808) 0x4c45fc JAE 4c46a0 |
(3808) 0x4c4602 SUB %R13,%R11 |
(3808) 0x4c4605 MOV %R11,%R14 |
(3808) 0x4c4608 AND $-0x10,%R14 |
(3808) 0x4c460c JE 4c466a |
(3808) 0x4c460e LEA -0x1(%R14),%RDX |
(3808) 0x4c4612 VPBROADCASTQ %R9,%YMM0 |
(3808) 0x4c4618 LEA (%R10,%R13,1),%R12 |
(3808) 0x4c461c MOV 0x68(%RBP),%RSI |
(3808) 0x4c4620 LEA (%RSI,%R12,8),%RDI |
(3808) 0x4c4624 XOR %ESI,%ESI |
(3808) 0x4c4626 NOPW %CS:(%RAX,%RAX,1) |
(3813) 0x4c4630 VPCMPEQQ 0x20(%RDI,%RSI,8),%YMM0,%K0 |
(3813) 0x4c4638 VPCMPEQQ (%RDI,%RSI,8),%YMM0,%K1 |
(3813) 0x4c463f VPCMPEQQ 0x60(%RDI,%RSI,8),%YMM0,%K2 |
(3813) 0x4c4647 VPCMPEQQ 0x40(%RDI,%RSI,8),%YMM0,%K3 |
(3813) 0x4c464f KORB %K0,%K1,%K4 |
(3813) 0x4c4653 KORB %K2,%K3,%K5 |
(3813) 0x4c4657 KORTESTB %K5,%K4 |
(3813) 0x4c465b JNE 4c482a |
(3813) 0x4c4661 ADD $0x10,%RSI |
(3813) 0x4c4665 CMP %RDX,%RSI |
(3813) 0x4c4668 JBE 4c4630 |
(3808) 0x4c466a CMP %R11,%R14 |
(3808) 0x4c466d MOV -0x30(%RBP),%RDI |
(3808) 0x4c4671 MOV %RAX,%RDX |
(3808) 0x4c4674 JE 4c46a0 |
(3808) 0x4c4676 ADD %R13,%R10 |
(3808) 0x4c4679 ADD %R14,%R10 |
(3808) 0x4c467c MOV %R10,%R12 |
(3808) 0x4c467f NOP |
(3812) 0x4c4680 MOV 0x68(%RBP),%RAX |
(3812) 0x4c4684 CMP %R9,(%RAX,%R12,8) |
(3812) 0x4c4688 JE 4c4857 |
(3812) 0x4c468e INC %R12 |
(3812) 0x4c4691 CMP %R12,%RCX |
(3812) 0x4c4694 JNE 4c4680 |
(3808) 0x4c4696 NOPW %CS:(%RAX,%RAX,1) |
(3808) 0x4c46a0 MOV -0x70(%RBP),%RSI |
(3808) 0x4c46a4 CMP -0x98(%RBP),%RSI |
(3808) 0x4c46ab JGE 4c4933 |
(3808) 0x4c46b1 MOV 0x68(%RBP),%RAX |
(3808) 0x4c46b5 MOV %R9,(%RAX,%RSI,8) |
(3808) 0x4c46b9 MOV 0x10(%RBP),%RAX |
(3808) 0x4c46bd VMOVQ (%RAX,%RDX,8),%XMM0 |
(3808) 0x4c46c2 MOV 0x70(%RBP),%RAX |
(3808) 0x4c46c6 VMOVQ %XMM0,(%RAX,%RSI,8) |
(3808) 0x4c46cb INC %RSI |
(3808) 0x4c46ce MOV %RSI,-0x70(%RBP) |
(3808) 0x4c46d2 JMP 4c456d |
0x4c46d7 NOPW (%RAX,%RAX,1) |
(3808) 0x4c46e0 MOV 0x78(%RBP),%RAX |
(3808) 0x4c46e4 MOV (%RAX,%R10,8),%R10 |
(3808) 0x4c46e8 MOV -0x80(%RBP),%R11 |
(3808) 0x4c46ec SUB %R10,%R11 |
(3808) 0x4c46ef JLE 4c47f0 |
(3808) 0x4c46f5 MOV %RDX,%RAX |
(3808) 0x4c46f8 MOV 0x80(%RBP),%RDX |
(3808) 0x4c46ff LEA (%RDX,%R10,8),%EDX |
(3808) 0x4c4703 AND $0x7f,%EDX |
(3808) 0x4c4706 MOV $0x80,%ESI |
(3808) 0x4c470b SUB %EDX,%ESI |
(3808) 0x4c470d SHR $0x3,%ESI |
(3808) 0x4c4710 CMP %RSI,%R11 |
(3808) 0x4c4713 MOV %RSI,%R13 |
(3808) 0x4c4716 CMOVB %R11,%R13 |
(3808) 0x4c471a TEST %R13,%R13 |
(3808) 0x4c471d JE 4c4749 |
(3808) 0x4c471f MOV %R10,%R12 |
(3808) 0x4c4722 MOV %R13,%RDX |
(3808) 0x4c4725 NOPW %CS:(%RAX,%RAX,1) |
(3811) 0x4c4730 MOV 0x80(%RBP),%R14 |
(3811) 0x4c4737 CMP %R9,(%R14,%R12,8) |
(3811) 0x4c473b JE 4c4554 |
(3811) 0x4c4741 INC %R12 |
(3811) 0x4c4744 DEC %RDX |
(3811) 0x4c4747 JNE 4c4730 |
(3808) 0x4c4749 CMP %R11,%RSI |
(3808) 0x4c474c MOV %RAX,%RDX |
(3808) 0x4c474f JAE 4c47f0 |
(3808) 0x4c4755 SUB %R13,%R11 |
(3808) 0x4c4758 MOV %R11,%R14 |
(3808) 0x4c475b AND $-0x10,%R14 |
(3808) 0x4c475f JE 4c47ba |
(3808) 0x4c4761 LEA -0x1(%R14),%RDX |
(3808) 0x4c4765 VPBROADCASTQ %R9,%YMM0 |
(3808) 0x4c476b LEA (%R10,%R13,1),%R12 |
(3808) 0x4c476f MOV 0x80(%RBP),%RSI |
(3808) 0x4c4776 LEA (%RSI,%R12,8),%RDI |
(3808) 0x4c477a XOR %ESI,%ESI |
(3808) 0x4c477c NOPL (%RAX) |
(3810) 0x4c4780 VPCMPEQQ 0x20(%RDI,%RSI,8),%YMM0,%K0 |
(3810) 0x4c4788 VPCMPEQQ (%RDI,%RSI,8),%YMM0,%K1 |
(3810) 0x4c478f VPCMPEQQ 0x60(%RDI,%RSI,8),%YMM0,%K2 |
(3810) 0x4c4797 VPCMPEQQ 0x40(%RDI,%RSI,8),%YMM0,%K3 |
(3810) 0x4c479f KORB %K0,%K1,%K4 |
(3810) 0x4c47a3 KORB %K2,%K3,%K5 |
(3810) 0x4c47a7 KORTESTB %K5,%K4 |
(3810) 0x4c47ab JNE 4c452a |
(3810) 0x4c47b1 ADD $0x10,%RSI |
(3810) 0x4c47b5 CMP %RDX,%RSI |
(3810) 0x4c47b8 JBE 4c4780 |
(3808) 0x4c47ba CMP %R11,%R14 |
(3808) 0x4c47bd MOV -0x30(%RBP),%RDI |
(3808) 0x4c47c1 MOV %RAX,%RDX |
(3808) 0x4c47c4 JE 4c47f0 |
(3808) 0x4c47c6 ADD %R13,%R10 |
(3808) 0x4c47c9 ADD %R14,%R10 |
(3808) 0x4c47cc MOV %R10,%R12 |
(3808) 0x4c47cf NOP |
(3809) 0x4c47d0 MOV 0x80(%RBP),%RAX |
(3809) 0x4c47d7 CMP %R9,(%RAX,%R12,8) |
(3809) 0x4c47db JE 4c4557 |
(3809) 0x4c47e1 INC %R12 |
(3809) 0x4c47e4 CMP %R12,-0x80(%RBP) |
(3809) 0x4c47e8 JNE 4c47d0 |
(3808) 0x4c47ea NOPW (%RAX,%RAX,1) |
(3808) 0x4c47f0 MOV -0x58(%RBP),%RSI |
(3808) 0x4c47f4 CMP -0x68(%RBP),%RSI |
(3808) 0x4c47f8 JGE 4c48b2 |
(3808) 0x4c47fe MOV 0x80(%RBP),%RAX |
(3808) 0x4c4805 MOV %R9,(%RAX,%RSI,8) |
(3808) 0x4c4809 MOV 0x10(%RBP),%RAX |
(3808) 0x4c480d VMOVQ (%RAX,%RDX,8),%XMM0 |
(3808) 0x4c4812 MOV 0x88(%RBP),%RAX |
(3808) 0x4c4819 VMOVQ %XMM0,(%RAX,%RSI,8) |
(3808) 0x4c481e INC %RSI |
(3808) 0x4c4821 MOV %RSI,-0x58(%RBP) |
(3808) 0x4c4825 JMP 4c456d |
(3808) 0x4c482a KSHIFTLB $0x4,%K0,%K0 |
(3808) 0x4c4830 KORB %K0,%K1,%K0 |
(3808) 0x4c4834 KSHIFTLB $0x4,%K2,%K1 |
(3808) 0x4c483a KORB %K1,%K3,%K1 |
(3808) 0x4c483e KUNPCKBW %K0,%K1,%K0 |
(3808) 0x4c4842 KMOVD %K0,%EDX |
(3808) 0x4c4846 TZCNT %EDX,%EDX |
(3808) 0x4c484a ADD %RSI,%R12 |
(3808) 0x4c484d ADD %RDX,%R12 |
(3808) 0x4c4850 MOV -0x30(%RBP),%RDI |
(3808) 0x4c4854 MOV %RAX,%RDX |
(3808) 0x4c4857 MOV 0x10(%RBP),%RAX |
(3808) 0x4c485b VMOVQ (%RAX,%RDX,8),%XMM0 |
(3808) 0x4c4860 MOV 0x70(%RBP),%RAX |
(3808) 0x4c4864 JMP 4c4567 |
(3801) 0x4c4869 MOV -0x90(%RBP),%RAX |
(3801) 0x4c4870 MOV %RAX,-0x60(%RBP) |
(3801) 0x4c4874 MOV -0x38(%RBP),%RDX |
(3801) 0x4c4878 MOV -0x50(%RBP),%RSI |
(3801) 0x4c487c MOV 0x20(%RBP),%R8 |
(3801) 0x4c4880 JMP 4c490f |
(3801) 0x4c4885 XOR %EAX,%EAX |
(3801) 0x4c4887 NOPW (%RAX,%RAX,1) |
(3815) 0x4c4890 MOV (%R13,%RAX,8),%RCX |
(3815) 0x4c4895 MOV %RCX,(%RDI,%RAX,8) |
(3815) 0x4c4899 VMOVQ (%R8,%RAX,8),%XMM0 |
(3815) 0x4c489f VMOVQ %XMM0,(%R10,%RAX,8) |
(3815) 0x4c48a5 INC %RAX |
(3815) 0x4c48a8 CMP %RAX,%RDX |
(3815) 0x4c48ab JNE 4c4890 |
(3801) 0x4c48ad JMP 4c49b7 |
(3801) 0x4c48b2 MOV %RDX,-0x60(%RBP) |
(3801) 0x4c48b6 MOV -0x48(%RBP),%R14 |
(3801) 0x4c48ba MOV $0x4f4963,%EDI |
(3801) 0x4c48bf MOV $0xd4e,%ESI |
(3801) 0x4c48c4 MOV $0x1,%EDX |
(3801) 0x4c48c9 XOR %ECX,%ECX |
(3801) 0x4c48cb VZEROUPPER |
(3801) 0x4c48ce CALL 4d7ce0 <hypre_error_handler> |
(3801) 0x4c48d3 MOV 0xd0(%RBP),%RAX |
(3801) 0x4c48da LOCK INCQ (%RAX) |
(3801) 0x4c48de MOV $0x4f4abf,%EDI |
(3801) 0x4c48e3 CMPQ $0,0xc0(%RBP) |
(3801) 0x4c48eb JE 4c48f7 |
(3801) 0x4c48ed MOV %R14,%RSI |
(3801) 0x4c48f0 XOR %EAX,%EAX |
(3801) 0x4c48f2 CALL 4d52e0 <hypre_printf> |
(3801) 0x4c48f7 MOV -0x38(%RBP),%RDX |
(3801) 0x4c48fb MOV -0x50(%RBP),%RSI |
(3801) 0x4c48ff MOV -0x30(%RBP),%RDI |
(3801) 0x4c4903 VPCMPEQD %YMM2,%YMM2,%YMM2 |
(3801) 0x4c4907 MOV 0x20(%RBP),%R8 |
(3801) 0x4c490b MOV -0x40(%RBP),%R10 |
(3801) 0x4c490f MOV -0x58(%RBP),%R9 |
(3801) 0x4c4913 MOV -0x70(%RBP),%R11 |
(3801) 0x4c4917 MOV 0x18(%RBP),%RAX |
(3801) 0x4c491b MOV (%RAX),%RAX |
(3801) 0x4c491e MOV 0x38(%RAX),%RCX |
(3801) 0x4c4922 MOV %R11,(%RCX,%R10,8) |
(3801) 0x4c4926 MOV 0x40(%RAX),%RAX |
(3801) 0x4c492a MOV %R9,(%RAX,%R10,8) |
(3801) 0x4c492e JMP 4c3f20 |
(3801) 0x4c4933 MOV %RDX,-0x60(%RBP) |
(3801) 0x4c4937 MOV -0x48(%RBP),%R14 |
(3801) 0x4c493b MOV $0x4f4963,%EDI |
(3801) 0x4c4940 MOV $0xd70,%ESI |
(3801) 0x4c4945 MOV $0x1,%EDX |
(3801) 0x4c494a XOR %ECX,%ECX |
(3801) 0x4c494c VZEROUPPER |
(3801) 0x4c494f CALL 4d7ce0 <hypre_error_handler> |
(3801) 0x4c4954 MOV 0xd0(%RBP),%RAX |
(3801) 0x4c495b LOCK INCQ (%RAX) |
(3801) 0x4c495f MOV $0x4f4ae5,%EDI |
(3801) 0x4c4964 CMPQ $0,0xc0(%RBP) |
(3801) 0x4c496c JNE 4c48ed |
(3801) 0x4c4972 JMP 4c48f7 |
(3801) 0x4c4974 MOV %RDX,%RAX |
(3801) 0x4c4977 AND $-0x4,%RAX |
(3801) 0x4c497b JE 4c49b3 |
(3801) 0x4c497d LEA -0x1(%RAX),%RCX |
(3801) 0x4c4981 XOR %ESI,%ESI |
(3801) 0x4c4983 NOPW %CS:(%RAX,%RAX,1) |
(3817) 0x4c4990 VMOVUPS (%R13,%RSI,8),%YMM0 |
(3817) 0x4c4997 VMOVUPS %YMM0,(%RDI,%RSI,8) |
(3817) 0x4c499c VMOVDQU (%R8,%RSI,8),%YMM0 |
(3817) 0x4c49a2 VMOVDQU %YMM0,(%R10,%RSI,8) |
(3817) 0x4c49a8 ADD $0x4,%RSI |
(3817) 0x4c49ac CMP %RCX,%RSI |
(3817) 0x4c49af JLE 4c4990 |
(3801) 0x4c49b1 JMP 4c49f0 |
(3801) 0x4c49b3 XOR %EAX,%EAX |
(3801) 0x4c49b5 JMP 4c49f5 |
(3801) 0x4c49b7 MOV %R13,%RDI |
(3801) 0x4c49ba VZEROUPPER |
(3801) 0x4c49bd CALL 4d5200 <hypre_Free> |
(3801) 0x4c49c2 MOV -0x88(%RBP),%RDI |
(3801) 0x4c49c9 CALL 4d5200 <hypre_Free> |
(3801) 0x4c49ce MOVQ $0,-0x88(%RBP) |
(3801) 0x4c49d9 MOV -0x38(%RBP),%RDX |
(3801) 0x4c49dd MOV -0x50(%RBP),%RSI |
(3801) 0x4c49e1 MOV -0x30(%RBP),%RDI |
(3801) 0x4c49e5 VPCMPEQD %YMM2,%YMM2,%YMM2 |
(3801) 0x4c49e9 JMP 4c3f1c |
0x4c49ee XCHG %AX,%AX |
(3816) 0x4c49f0 CMP %RAX,%RDX |
(3816) 0x4c49f3 JE 4c49b7 |
(3816) 0x4c49f5 MOV (%R13,%RAX,8),%RCX |
(3816) 0x4c49fa MOV %RCX,(%RDI,%RAX,8) |
(3816) 0x4c49fe VMOVQ (%R8,%RAX,8),%XMM0 |
(3816) 0x4c4a04 VMOVQ %XMM0,(%R10,%RAX,8) |
(3816) 0x4c4a0a INC %RAX |
(3816) 0x4c4a0d JMP 4c49f0 |
0x4c4a0f ADD $0x98,%RSP |
0x4c4a16 POP %RBX |
0x4c4a17 POP %R12 |
0x4c4a19 POP %R13 |
0x4c4a1b POP %R14 |
0x4c4a1d POP %R15 |
0x4c4a1f POP %RBP |
0x4c4a20 VZEROUPPER |
0x4c4a23 RET |
0x4c4a24 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | IJMatrix_parcsr.c:3240-3484 |
Module | exec |
nb instructions | 163 |
nb uops | 176 |
loop length | 663 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 29.33 cycles |
front end | 29.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 12.30 | 12.20 | 9.00 | 9.00 | 9.50 | 12.20 | 12.10 | 9.50 | 9.50 | 9.50 | 12.20 | 9.00 |
cycles | 12.30 | 16.80 | 9.00 | 9.00 | 9.50 | 12.20 | 12.10 | 9.50 | 9.50 | 9.50 | 12.20 | 9.00 |
Cycles executing div or sqrt instructions | 16.00 |
FE+BE cycles | 28.02-28.08 |
Stall cycles | 0.00 |
Front-end | 29.33 |
Dispatch | 16.80 |
DIV/SQRT | 16.00 |
Overall L1 | 29.33 |
all | 22% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 27% |
all | 15% |
load | NA (no load vectorizable/vectorized instructions) |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x98,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4d6de0 <hypre_NumActiveThreads> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4d6df0 <hypre_GetThreadNum> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
OR %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
JE 4c3cae <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x4e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13 | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
JMP 4c3cb6 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x56> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV 0xb8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JGE 4c3cde <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x7e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x1(%RAX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R14,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL %RCX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JMP 4c3cf2 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x92> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R14,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %RAX,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVQ $0,(%R15,%R14,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 4c3ddf <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R15,%R14,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x8(%R8,%RDI,8),%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVL %R14,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IMUL %R14,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%RAX,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4c3d54 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xf4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R8,%RSI,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4c3d54 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xf4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R12,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4c3ddf <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x30(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 4c3db6 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x156> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R9,%R12,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VEXTRACTI128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPSHUFD $-0x12,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVQ %XMM0,%RDI | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
CMP %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 4c3dc2 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x162> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4c3ddc <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,(%RCX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R13),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x719570,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 40fef0 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
TEST %R14,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 4c3e94 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x234> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4c3e94 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x234> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4c3e67 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x207> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV (%R15),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x40(%R15),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 4c3e94 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x234> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV (%R15,%RCX,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%R13),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x719590,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 40fef0 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
TEST %R14,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 4c3eb2 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x252> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x8(%R15,%R14,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 4c3eba <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x25a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOVQ $0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x38(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDI,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4c4a0f <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xdaf> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x98(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R14,8),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x1(%RCX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPEQD %YMM2,%YMM2,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV 0x20(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 4c3f2c <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x2cc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x98,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | IJMatrix_parcsr.c:3240-3484 |
Module | exec |
nb instructions | 163 |
nb uops | 176 |
loop length | 663 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 29.33 cycles |
front end | 29.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 12.30 | 12.20 | 9.00 | 9.00 | 9.50 | 12.20 | 12.10 | 9.50 | 9.50 | 9.50 | 12.20 | 9.00 |
cycles | 12.30 | 16.80 | 9.00 | 9.00 | 9.50 | 12.20 | 12.10 | 9.50 | 9.50 | 9.50 | 12.20 | 9.00 |
Cycles executing div or sqrt instructions | 16.00 |
FE+BE cycles | 28.02-28.08 |
Stall cycles | 0.00 |
Front-end | 29.33 |
Dispatch | 16.80 |
DIV/SQRT | 16.00 |
Overall L1 | 29.33 |
all | 22% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 27% |
all | 15% |
load | NA (no load vectorizable/vectorized instructions) |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x98,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4d6de0 <hypre_NumActiveThreads> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4d6df0 <hypre_GetThreadNum> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
OR %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
JE 4c3cae <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x4e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13 | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
JMP 4c3cb6 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x56> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV 0xb8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JGE 4c3cde <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x7e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x1(%RAX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R14,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL %RCX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JMP 4c3cf2 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x92> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R14,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %RAX,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVQ $0,(%R15,%R14,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 4c3ddf <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R15,%R14,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x8(%R8,%RDI,8),%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVL %R14,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IMUL %R14,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%RAX,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4c3d54 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xf4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R8,%RSI,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4c3d54 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xf4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R12,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4c3ddf <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x30(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 4c3db6 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x156> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R9,%R12,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VEXTRACTI128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPSHUFD $-0x12,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVQ %XMM0,%RDI | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
CMP %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 4c3dc2 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x162> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4c3ddc <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,(%RCX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R13),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x719570,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 40fef0 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
TEST %R14,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 4c3e94 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x234> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4c3e94 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x234> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4c3e67 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x207> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV (%R15),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x40(%R15),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 4c3e94 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x234> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV (%R15,%RCX,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%R13),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x719590,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 40fef0 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
TEST %R14,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 4c3eb2 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x252> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x8(%R15,%R14,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 4c3eba <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x25a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOVQ $0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x38(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDI,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4c4a0f <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xdaf> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x98(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R14,8),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x1(%RCX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPEQD %YMM2,%YMM2,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV 0x20(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 4c3f2c <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x2cc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x98,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_IJMatrixSetValuesOMPParCSR.extracted.28– | 0.64 | 0.1 |
▼Loop 3801 - IJMatrix_parcsr.c:3262-3484 - exec– | 0.11 | 0.02 |
▼Loop 3808 - IJMatrix_parcsr.c:3262-3454 - exec– | 0.53 | 0.07 |
○Loop 3812 - IJMatrix_parcsr.c:3422-3424 - exec | 0 | 0 |
○Loop 3814 - IJMatrix_parcsr.c:3422-3424 - exec | 0 | 0 |
○Loop 3809 - IJMatrix_parcsr.c:3388-3390 - exec | 0 | 0 |
○Loop 3813 - IJMatrix_parcsr.c:3422-3424 - exec | 0 | 0 |
○Loop 3810 - IJMatrix_parcsr.c:3388-3424 - exec | 0 | 0 |
○Loop 3811 - IJMatrix_parcsr.c:3388-3390 - exec | 0 | 0 |
▼Loop 3818 - IJMatrix_parcsr.c:3262-3337 - exec– | 0 | 0 |
○Loop 3820 - IJMatrix_parcsr.c:3318-3320 - exec | 0 | 0 |
○Loop 3821 - IJMatrix_parcsr.c:3318-3320 - exec | 0 | 0 |
○Loop 3819 - IJMatrix_parcsr.c:3318-3320 - exec | 0 | 0 |
○Loop 3815 - IJMatrix_parcsr.c:3359-3362 - exec | 0 | 0 |
○Loop 3816 - IJMatrix_parcsr.c:3359-3482 - exec | 0 | 0 |
○Loop 3817 - IJMatrix_parcsr.c:3359-3362 - exec | 0 | 0 |
▼Loop 3802 - IJMatrix_parcsr.c:3262-3484 - exec– | 0 | 0 |
▼Loop 3803 - IJMatrix_parcsr.c:3475-3484 - exec– | 0 | 0 |
○Loop 3804 - IJMatrix_parcsr.c:3478-3484 - exec | 0 | 0 |
▼Loop 3805 - IJMatrix_parcsr.c:3262-3484 - exec– | 0 | 0 |
○Loop 3807 - IJMatrix_parcsr.c:3262-3482 - exec | 0 | 0 |
○Loop 3806 - IJMatrix_parcsr.c:3262-3482 - exec | 0 | 0 |
○Loop 3824 - IJMatrix_parcsr.c:3274-3275 - exec | 0 | 0 |
○Loop 3823 - IJMatrix_parcsr.c:3282-3283 - exec | 0 | 0 |
○Loop 3822 - IJMatrix_parcsr.c:3282-3283 - exec | 0 | 0 |
○Loop 3825 - IJMatrix_parcsr.c:3274-3275 - exec | 0 | 0 |
○Loop 3826 - IJMatrix_parcsr.c:3274-3275 - exec | 0 | 0 |