Function: hypre_CSRMatrixMatvecT._omp_fn.3 | Module: exec | Source: csr_matvec.c:554-579 [...] | Coverage: 3.27% |
---|
Function: hypre_CSRMatrixMatvecT._omp_fn.3 | Module: exec | Source: csr_matvec.c:554-579 [...] | Coverage: 3.27% |
---|
/scratch_na/users/xoserete/qaas_runs/171-587-0261/intel/AMG/build/AMG/AMG/seq_mv/csr_matvec.c: 554 - 579 |
-------------------------------------------------------------------------------- |
554: #pragma omp parallel private(i,jj,j,my_thread_num,offset) |
555: #endif |
556: { |
557: my_thread_num = hypre_GetThreadNum(); |
558: offset = y_size*my_thread_num; |
[...] |
564: for (jj = A_i[i]; jj < A_i[i+1]; jj++) |
565: { |
566: j = A_j[jj]; |
567: y_data_expand[offset + j] += A_data[jj] * x_data[i]; |
[...] |
577: for (j = 0; j < num_threads; j++) |
578: { |
579: y_data[i] += y_data_expand[j*y_size + i]; |
0x5a2220 PUSH %RBP |
0x5a2221 MOV %RSP,%RBP |
0x5a2224 PUSH %R15 |
0x5a2226 PUSH %R14 |
0x5a2228 PUSH %R13 |
0x5a222a PUSH %R12 |
0x5a222c PUSH %RBX |
0x5a222d SUB $0x38,%RSP |
0x5a2231 MOV 0x28(%RDI),%RDX |
0x5a2235 MOV 0x10(%RDI),%RSI |
0x5a2239 MOV 0x20(%RDI),%R10 |
0x5a223d MOV 0x30(%RDI),%RAX |
0x5a2241 MOV 0x18(%RDI),%R15 |
0x5a2245 MOV 0x40(%RDI),%R13 |
0x5a2249 MOV %RDX,-0x60(%RBP) |
0x5a224d MOV 0x38(%RDI),%RBX |
0x5a2251 MOV 0x8(%RDI),%R14 |
0x5a2255 MOV %RSI,-0x50(%RBP) |
0x5a2259 MOV (%RDI),%RDI |
0x5a225c MOV %R10,-0x58(%RBP) |
0x5a2260 MOV %RAX,-0x40(%RBP) |
0x5a2264 MOV %RDI,-0x48(%RBP) |
0x5a2268 CALL 5b39c0 <hypre_GetThreadNum> |
0x5a226d MOV %RAX,%R12 |
0x5a2270 CALL 4110b0 <omp_get_num_threads@plt> |
0x5a2275 CLTQ |
0x5a2277 MOV %RAX,-0x38(%RBP) |
0x5a227b CALL 4111f0 <omp_get_thread_num@plt> |
0x5a2280 MOV -0x48(%RBP),%R11 |
0x5a2284 MOV -0x50(%RBP),%RSI |
0x5a2288 MOVSXD %EAX,%R9 |
0x5a228b MOV %R15,%RAX |
0x5a228e MOV -0x58(%RBP),%R8 |
0x5a2292 CQTO |
0x5a2294 IDIVQ -0x38(%RBP) |
0x5a2298 CMP %RDX,%R9 |
0x5a229b MOV %RAX,%RCX |
0x5a229e JL 5a26e0 |
0x5a22a4 MOV %RCX,%R10 |
0x5a22a7 IMUL %R9,%R10 |
0x5a22ab ADD %R10,%RDX |
0x5a22ae ADD %RDX,%RCX |
0x5a22b1 CMP %RCX,%RDX |
0x5a22b4 JGE 5a253f |
0x5a22ba MOV -0x40(%RBP),%R15 |
0x5a22be SAL $0x3,%RDX |
0x5a22c2 LEA (%R8,%RCX,8),%RDI |
0x5a22c6 ADD %RDX,%R14 |
0x5a22c9 ADD %R8,%RDX |
0x5a22cc IMUL %R15,%R12 |
(3101) 0x5a22d0 MOV (%R14),%RAX |
(3101) 0x5a22d3 MOV 0x8(%R14),%R8 |
(3101) 0x5a22d7 CMP %R8,%RAX |
(3101) 0x5a22da JGE 5a252e |
(3101) 0x5a22e0 MOV %R8,%RCX |
(3101) 0x5a22e3 SUB %RAX,%RCX |
(3101) 0x5a22e6 AND $0x7,%ECX |
(3101) 0x5a22e9 JE 5a2419 |
(3101) 0x5a22ef CMP $0x1,%RCX |
(3101) 0x5a22f3 JE 5a23ed |
(3101) 0x5a22f9 CMP $0x2,%RCX |
(3101) 0x5a22fd JE 5a23cc |
(3101) 0x5a2303 CMP $0x3,%RCX |
(3101) 0x5a2307 JE 5a23a9 |
(3101) 0x5a230d CMP $0x4,%RCX |
(3101) 0x5a2311 JE 5a2386 |
(3101) 0x5a2313 CMP $0x5,%RCX |
(3101) 0x5a2317 JE 5a2365 |
(3101) 0x5a2319 CMP $0x6,%RCX |
(3101) 0x5a231d JE 5a2342 |
(3101) 0x5a231f MOV (%RSI,%RAX,8),%R10 |
(3101) 0x5a2323 VMOVSD (%R11,%RAX,8),%XMM0 |
(3101) 0x5a2329 INC %RAX |
(3101) 0x5a232c ADD %R12,%R10 |
(3101) 0x5a232f LEA (%RBX,%R10,8),%R15 |
(3101) 0x5a2333 VMOVSD (%R15),%XMM7 |
(3101) 0x5a2338 VFMADD132SD (%RDX),%XMM7,%XMM0 |
(3101) 0x5a233d VMOVSD %XMM0,(%R15) |
(3101) 0x5a2342 MOV (%RSI,%RAX,8),%RCX |
(3101) 0x5a2346 VMOVSD (%R11,%RAX,8),%XMM1 |
(3101) 0x5a234c INC %RAX |
(3101) 0x5a234f ADD %R12,%RCX |
(3101) 0x5a2352 LEA (%RBX,%RCX,8),%R10 |
(3101) 0x5a2356 VMOVSD (%R10),%XMM2 |
(3101) 0x5a235b VFMADD132SD (%RDX),%XMM2,%XMM1 |
(3101) 0x5a2360 VMOVSD %XMM1,(%R10) |
(3101) 0x5a2365 MOV (%RSI,%RAX,8),%R15 |
(3101) 0x5a2369 VMOVSD (%R11,%RAX,8),%XMM3 |
(3101) 0x5a236f INC %RAX |
(3101) 0x5a2372 ADD %R12,%R15 |
(3101) 0x5a2375 LEA (%RBX,%R15,8),%RCX |
(3101) 0x5a2379 VMOVSD (%RCX),%XMM6 |
(3101) 0x5a237d VFMADD132SD (%RDX),%XMM6,%XMM3 |
(3101) 0x5a2382 VMOVSD %XMM3,(%RCX) |
(3101) 0x5a2386 MOV (%RSI,%RAX,8),%R10 |
(3101) 0x5a238a VMOVSD (%R11,%RAX,8),%XMM4 |
(3101) 0x5a2390 INC %RAX |
(3101) 0x5a2393 ADD %R12,%R10 |
(3101) 0x5a2396 LEA (%RBX,%R10,8),%R15 |
(3101) 0x5a239a VMOVSD (%R15),%XMM5 |
(3101) 0x5a239f VFMADD132SD (%RDX),%XMM5,%XMM4 |
(3101) 0x5a23a4 VMOVSD %XMM4,(%R15) |
(3101) 0x5a23a9 MOV (%RSI,%RAX,8),%RCX |
(3101) 0x5a23ad VMOVSD (%R11,%RAX,8),%XMM8 |
(3101) 0x5a23b3 INC %RAX |
(3101) 0x5a23b6 ADD %R12,%RCX |
(3101) 0x5a23b9 LEA (%RBX,%RCX,8),%R10 |
(3101) 0x5a23bd VMOVSD (%R10),%XMM9 |
(3101) 0x5a23c2 VFMADD132SD (%RDX),%XMM9,%XMM8 |
(3101) 0x5a23c7 VMOVSD %XMM8,(%R10) |
(3101) 0x5a23cc MOV (%RSI,%RAX,8),%R15 |
(3101) 0x5a23d0 VMOVSD (%R11,%RAX,8),%XMM10 |
(3101) 0x5a23d6 INC %RAX |
(3101) 0x5a23d9 ADD %R12,%R15 |
(3101) 0x5a23dc LEA (%RBX,%R15,8),%RCX |
(3101) 0x5a23e0 VMOVSD (%RCX),%XMM11 |
(3101) 0x5a23e4 VFMADD132SD (%RDX),%XMM11,%XMM10 |
(3101) 0x5a23e9 VMOVSD %XMM10,(%RCX) |
(3101) 0x5a23ed MOV (%RSI,%RAX,8),%R10 |
(3101) 0x5a23f1 VMOVSD (%R11,%RAX,8),%XMM12 |
(3101) 0x5a23f7 INC %RAX |
(3101) 0x5a23fa ADD %R12,%R10 |
(3101) 0x5a23fd LEA (%RBX,%R10,8),%R15 |
(3101) 0x5a2401 VMOVSD (%R15),%XMM13 |
(3101) 0x5a2406 VFMADD132SD (%RDX),%XMM13,%XMM12 |
(3101) 0x5a240b VMOVSD %XMM12,(%R15) |
(3101) 0x5a2410 CMP %R8,%RAX |
(3101) 0x5a2413 JE 5a252e |
(3102) 0x5a2419 MOV (%RSI,%RAX,8),%RCX |
(3102) 0x5a241d VMOVSD (%R11,%RAX,8),%XMM14 |
(3102) 0x5a2423 MOV 0x8(%RSI,%RAX,8),%R15 |
(3102) 0x5a2428 ADD %R12,%RCX |
(3102) 0x5a242b LEA (%RBX,%RCX,8),%R10 |
(3102) 0x5a242f ADD %R12,%R15 |
(3102) 0x5a2432 VMOVSD (%R10),%XMM15 |
(3102) 0x5a2437 LEA (%RBX,%R15,8),%RCX |
(3102) 0x5a243b VFMADD132SD (%RDX),%XMM15,%XMM14 |
(3102) 0x5a2440 VMOVSD %XMM14,(%R10) |
(3102) 0x5a2445 MOV 0x10(%RSI,%RAX,8),%R10 |
(3102) 0x5a244a VMOVSD 0x8(%R11,%RAX,8),%XMM0 |
(3102) 0x5a2451 VMOVSD (%RCX),%XMM7 |
(3102) 0x5a2455 ADD %R12,%R10 |
(3102) 0x5a2458 VFMADD132SD (%RDX),%XMM7,%XMM0 |
(3102) 0x5a245d LEA (%RBX,%R10,8),%R15 |
(3102) 0x5a2461 VMOVSD %XMM0,(%RCX) |
(3102) 0x5a2465 MOV 0x18(%RSI,%RAX,8),%RCX |
(3102) 0x5a246a VMOVSD 0x10(%R11,%RAX,8),%XMM1 |
(3102) 0x5a2471 VMOVSD (%R15),%XMM2 |
(3102) 0x5a2476 ADD %R12,%RCX |
(3102) 0x5a2479 VFMADD132SD (%RDX),%XMM2,%XMM1 |
(3102) 0x5a247e LEA (%RBX,%RCX,8),%R10 |
(3102) 0x5a2482 VMOVSD %XMM1,(%R15) |
(3102) 0x5a2487 MOV 0x20(%RSI,%RAX,8),%R15 |
(3102) 0x5a248c VMOVSD 0x18(%R11,%RAX,8),%XMM3 |
(3102) 0x5a2493 VMOVSD (%R10),%XMM6 |
(3102) 0x5a2498 ADD %R12,%R15 |
(3102) 0x5a249b VFMADD132SD (%RDX),%XMM6,%XMM3 |
(3102) 0x5a24a0 LEA (%RBX,%R15,8),%RCX |
(3102) 0x5a24a4 VMOVSD %XMM3,(%R10) |
(3102) 0x5a24a9 MOV 0x28(%RSI,%RAX,8),%R10 |
(3102) 0x5a24ae VMOVSD 0x20(%R11,%RAX,8),%XMM4 |
(3102) 0x5a24b5 VMOVSD (%RCX),%XMM5 |
(3102) 0x5a24b9 ADD %R12,%R10 |
(3102) 0x5a24bc VFMADD132SD (%RDX),%XMM5,%XMM4 |
(3102) 0x5a24c1 LEA (%RBX,%R10,8),%R15 |
(3102) 0x5a24c5 VMOVSD %XMM4,(%RCX) |
(3102) 0x5a24c9 MOV 0x30(%RSI,%RAX,8),%RCX |
(3102) 0x5a24ce VMOVSD 0x28(%R11,%RAX,8),%XMM8 |
(3102) 0x5a24d5 VMOVSD (%R15),%XMM9 |
(3102) 0x5a24da ADD %R12,%RCX |
(3102) 0x5a24dd VFMADD132SD (%RDX),%XMM9,%XMM8 |
(3102) 0x5a24e2 LEA (%RBX,%RCX,8),%R10 |
(3102) 0x5a24e6 VMOVSD %XMM8,(%R15) |
(3102) 0x5a24eb VMOVSD 0x30(%R11,%RAX,8),%XMM10 |
(3102) 0x5a24f2 VMOVSD (%R10),%XMM11 |
(3102) 0x5a24f7 MOV 0x38(%RSI,%RAX,8),%R15 |
(3102) 0x5a24fc VFMADD132SD (%RDX),%XMM11,%XMM10 |
(3102) 0x5a2501 ADD %R12,%R15 |
(3102) 0x5a2504 LEA (%RBX,%R15,8),%RCX |
(3102) 0x5a2508 VMOVSD %XMM10,(%R10) |
(3102) 0x5a250d VMOVSD 0x38(%R11,%RAX,8),%XMM12 |
(3102) 0x5a2514 VMOVSD (%RCX),%XMM13 |
(3102) 0x5a2518 ADD $0x8,%RAX |
(3102) 0x5a251c VFMADD132SD (%RDX),%XMM13,%XMM12 |
(3102) 0x5a2521 VMOVSD %XMM12,(%RCX) |
(3102) 0x5a2525 CMP %R8,%RAX |
(3102) 0x5a2528 JNE 5a2419 |
(3101) 0x5a252e ADD $0x8,%RDX |
(3101) 0x5a2532 ADD $0x8,%R14 |
(3101) 0x5a2536 CMP %RDX,%RDI |
(3101) 0x5a2539 JNE 5a22d0 |
0x5a253f MOV %R9,-0x48(%RBP) |
0x5a2543 CALL 411290 <GOMP_barrier@plt> |
0x5a2548 MOV -0x40(%RBP),%RAX |
0x5a254c MOV -0x48(%RBP),%R14 |
0x5a2550 CQTO |
0x5a2552 IDIVQ -0x38(%RBP) |
0x5a2556 CMP %RDX,%R14 |
0x5a2559 JL 5a26d6 |
0x5a255f IMUL %RAX,%R14 |
0x5a2563 ADD %R14,%RDX |
0x5a2566 ADD %RDX,%RAX |
0x5a2569 CMP %RAX,%RDX |
0x5a256c JGE 5a26c3 |
0x5a2572 TEST %R13,%R13 |
0x5a2575 JLE 5a26c3 |
0x5a257b MOV -0x60(%RBP),%R11 |
0x5a257f LEA (,%RDX,8),%R12 |
0x5a2587 ADD %R12,%R11 |
0x5a258a ADD %RBX,%R12 |
0x5a258d MOV -0x40(%RBP),%RBX |
0x5a2591 SAL $0x3,%RBX |
0x5a2595 NOPL (%RAX) |
(3100) 0x5a2598 MOV %R13,%R9 |
(3100) 0x5a259b VMOVSD (%R11),%XMM14 |
(3100) 0x5a25a0 MOV %R12,%RDI |
(3100) 0x5a25a3 XOR %ESI,%ESI |
(3100) 0x5a25a5 AND $0x7,%R9D |
(3100) 0x5a25a9 JE 5a2646 |
(3100) 0x5a25af CMP $0x1,%R9 |
(3100) 0x5a25b3 JE 5a2632 |
(3100) 0x5a25b5 CMP $0x2,%R9 |
(3100) 0x5a25b9 JE 5a2623 |
(3100) 0x5a25bb CMP $0x3,%R9 |
(3100) 0x5a25bf JE 5a2614 |
(3100) 0x5a25c1 CMP $0x4,%R9 |
(3100) 0x5a25c5 JE 5a2605 |
(3100) 0x5a25c7 CMP $0x5,%R9 |
(3100) 0x5a25cb JE 5a25f6 |
(3100) 0x5a25cd CMP $0x6,%R9 |
(3100) 0x5a25d1 JE 5a25e7 |
(3100) 0x5a25d3 VADDSD (%R12),%XMM14,%XMM14 |
(3100) 0x5a25d9 MOV $0x1,%ESI |
(3100) 0x5a25de LEA (%R12,%RBX,1),%RDI |
(3100) 0x5a25e2 VMOVSD %XMM14,(%R11) |
(3100) 0x5a25e7 VADDSD (%RDI),%XMM14,%XMM14 |
(3100) 0x5a25eb INC %RSI |
(3100) 0x5a25ee ADD %RBX,%RDI |
(3100) 0x5a25f1 VMOVSD %XMM14,(%R11) |
(3100) 0x5a25f6 VADDSD (%RDI),%XMM14,%XMM14 |
(3100) 0x5a25fa INC %RSI |
(3100) 0x5a25fd ADD %RBX,%RDI |
(3100) 0x5a2600 VMOVSD %XMM14,(%R11) |
(3100) 0x5a2605 VADDSD (%RDI),%XMM14,%XMM14 |
(3100) 0x5a2609 INC %RSI |
(3100) 0x5a260c ADD %RBX,%RDI |
(3100) 0x5a260f VMOVSD %XMM14,(%R11) |
(3100) 0x5a2614 VADDSD (%RDI),%XMM14,%XMM14 |
(3100) 0x5a2618 INC %RSI |
(3100) 0x5a261b ADD %RBX,%RDI |
(3100) 0x5a261e VMOVSD %XMM14,(%R11) |
(3100) 0x5a2623 VADDSD (%RDI),%XMM14,%XMM14 |
(3100) 0x5a2627 INC %RSI |
(3100) 0x5a262a ADD %RBX,%RDI |
(3100) 0x5a262d VMOVSD %XMM14,(%R11) |
(3100) 0x5a2632 VADDSD (%RDI),%XMM14,%XMM14 |
(3100) 0x5a2636 INC %RSI |
(3100) 0x5a2639 ADD %RBX,%RDI |
(3100) 0x5a263c VMOVSD %XMM14,(%R11) |
(3100) 0x5a2641 CMP %RSI,%R13 |
(3100) 0x5a2644 JE 5a26af |
(3099) 0x5a2646 VADDSD (%RDI),%XMM14,%XMM15 |
(3099) 0x5a264a ADD %RBX,%RDI |
(3099) 0x5a264d ADD $0x8,%RSI |
(3099) 0x5a2651 VMOVSD %XMM15,(%R11) |
(3099) 0x5a2656 VADDSD (%RDI),%XMM15,%XMM0 |
(3099) 0x5a265a ADD %RBX,%RDI |
(3099) 0x5a265d VMOVSD %XMM0,(%R11) |
(3099) 0x5a2662 VADDSD (%RDI),%XMM0,%XMM7 |
(3099) 0x5a2666 ADD %RBX,%RDI |
(3099) 0x5a2669 VMOVSD %XMM7,(%R11) |
(3099) 0x5a266e VADDSD (%RDI),%XMM7,%XMM1 |
(3099) 0x5a2672 ADD %RBX,%RDI |
(3099) 0x5a2675 VMOVSD %XMM1,(%R11) |
(3099) 0x5a267a VADDSD (%RDI),%XMM1,%XMM2 |
(3099) 0x5a267e ADD %RBX,%RDI |
(3099) 0x5a2681 VMOVSD %XMM2,(%R11) |
(3099) 0x5a2686 VADDSD (%RDI),%XMM2,%XMM3 |
(3099) 0x5a268a ADD %RBX,%RDI |
(3099) 0x5a268d VMOVSD %XMM3,(%R11) |
(3099) 0x5a2692 VADDSD (%RDI),%XMM3,%XMM6 |
(3099) 0x5a2696 ADD %RBX,%RDI |
(3099) 0x5a2699 VMOVSD %XMM6,(%R11) |
(3099) 0x5a269e VADDSD (%RDI),%XMM6,%XMM14 |
(3099) 0x5a26a2 ADD %RBX,%RDI |
(3099) 0x5a26a5 VMOVSD %XMM14,(%R11) |
(3099) 0x5a26aa CMP %RSI,%R13 |
(3099) 0x5a26ad JNE 5a2646 |
(3100) 0x5a26af INC %RDX |
(3100) 0x5a26b2 ADD $0x8,%R11 |
(3100) 0x5a26b6 ADD $0x8,%R12 |
(3100) 0x5a26ba CMP %RDX,%RAX |
(3100) 0x5a26bd JNE 5a2598 |
0x5a26c3 ADD $0x38,%RSP |
0x5a26c7 POP %RBX |
0x5a26c8 POP %R12 |
0x5a26ca POP %R13 |
0x5a26cc POP %R14 |
0x5a26ce POP %R15 |
0x5a26d0 POP %RBP |
0x5a26d1 JMP 411290 |
0x5a26d6 INC %RAX |
0x5a26d9 XOR %EDX,%EDX |
0x5a26db JMP 5a255f |
0x5a26e0 INC %RCX |
0x5a26e3 XOR %EDX,%EDX |
0x5a26e5 JMP 5a22a4 |
0x5a26ea NOPW (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○98.18 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○1.82 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | csr_matvec.c:554-579 |
Module | exec |
nb instructions | 87 |
nb uops | 99 |
loop length | 310 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 16.50 cycles |
front end | 16.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.10 | 9.00 | 8.33 | 8.33 | 8.50 | 6.00 | 5.90 | 8.50 | 8.50 | 8.50 | 6.00 | 8.33 |
cycles | 6.10 | 12.43 | 8.33 | 8.33 | 8.50 | 6.00 | 5.90 | 8.50 | 8.50 | 8.50 | 6.00 | 8.33 |
Cycles executing div or sqrt instructions | 20.00 |
FE+BE cycles | 20.18-20.23 |
Stall cycles | 4.18-4.25 |
ROB full (events) | 3.47-0.00 |
PRF_INT full (events) | 1.72-4.40 |
Front-end | 16.50 |
Dispatch | 12.43 |
DIV/SQRT | 20.00 |
Overall L1 | 20.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDI),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RDI),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 5b39c0 <hypre_GetThreadNum> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4110b0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4111f0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EAX,%R9 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x58(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIVQ -0x38(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 |
CMP %RDX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JL 5a26e0 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R9,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R10,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 5a253f <hypre_CSRMatrixMatvecT._omp_fn.3+0x31f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R8,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %R15,%R12 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R9,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 411290 <GOMP_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIVQ -0x38(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 5a26d6 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4b6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %RAX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R14,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 5a26c3 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R13,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 5a26c3 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RDX,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R12,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RBX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 411290 <GOMP_barrier@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 5a255f <hypre_CSRMatrixMatvecT._omp_fn.3+0x33f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 5a22a4 <hypre_CSRMatrixMatvecT._omp_fn.3+0x84> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | csr_matvec.c:554-579 |
Module | exec |
nb instructions | 87 |
nb uops | 99 |
loop length | 310 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 16.50 cycles |
front end | 16.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.10 | 9.00 | 8.33 | 8.33 | 8.50 | 6.00 | 5.90 | 8.50 | 8.50 | 8.50 | 6.00 | 8.33 |
cycles | 6.10 | 12.43 | 8.33 | 8.33 | 8.50 | 6.00 | 5.90 | 8.50 | 8.50 | 8.50 | 6.00 | 8.33 |
Cycles executing div or sqrt instructions | 20.00 |
FE+BE cycles | 20.18-20.23 |
Stall cycles | 4.18-4.25 |
ROB full (events) | 3.47-0.00 |
PRF_INT full (events) | 1.72-4.40 |
Front-end | 16.50 |
Dispatch | 12.43 |
DIV/SQRT | 20.00 |
Overall L1 | 20.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDI),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RDI),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 5b39c0 <hypre_GetThreadNum> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4110b0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4111f0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EAX,%R9 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x58(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIVQ -0x38(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 |
CMP %RDX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JL 5a26e0 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R9,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R10,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 5a253f <hypre_CSRMatrixMatvecT._omp_fn.3+0x31f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R8,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %R15,%R12 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R9,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 411290 <GOMP_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIVQ -0x38(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 5a26d6 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4b6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %RAX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R14,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 5a26c3 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R13,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 5a26c3 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RDX,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R12,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RBX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 411290 <GOMP_barrier@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 5a255f <hypre_CSRMatrixMatvecT._omp_fn.3+0x33f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 5a22a4 <hypre_CSRMatrixMatvecT._omp_fn.3+0x84> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_CSRMatrixMatvecT._omp_fn.3– | 3.27 | 0.87 |
▼Loop 3101 - csr_matvec.c:564-567 - exec– | 3.13 | 0.55 |
○Loop 3102 - csr_matvec.c:564-567 - exec | 0.04 | 0.01 |
▼Loop 3100 - csr_matvec.c:577-579 - exec– | 0.01 | 0 |
○Loop 3099 - csr_matvec.c:577-579 - exec | 0.1 | 0.02 |