Function: qmcplusplus::TimerType<qmcplusplus::CPUClock>::stop() | Module: exec | Source: NewTimer.cpp:85-120 [...] | Coverage: 0.04% |
---|
Function: qmcplusplus::TimerType<qmcplusplus::CPUClock>::stop() | Module: exec | Source: NewTimer.cpp:85-120 [...] | Coverage: 0.04% |
---|
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.cpp: 85 - 120 |
-------------------------------------------------------------------------------- |
85: { |
86: if (active) |
[...] |
99: for (int level = omp_get_level(); level > 0; level--) |
100: if (omp_get_ancestor_thread_num(level) != 0) |
101: is_true_master = false; |
102: if (is_true_master) |
103: { |
104: double elapsed = CLOCK()() - start_time; |
105: total_time += elapsed; |
106: num_calls++; |
107: |
108: per_stack_total_time[current_stack_key] += elapsed; |
109: per_stack_num_calls[current_stack_key] += 1; |
110: |
111: if (manager) |
112: manager->pop_timer(this); |
[...] |
120: } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Utilities/Clock.h: 33 - 33 |
-------------------------------------------------------------------------------- |
33: return omp_get_wtime(); |
0x4650a0 PUSH %RBP |
0x4650a1 MOV %RSP,%RBP |
0x4650a4 PUSH %R15 |
0x4650a6 PUSH %R14 |
0x4650a8 PUSH %R13 |
0x4650aa PUSH %R12 |
0x4650ac PUSH %RBX |
0x4650ad SUB $0x18,%RSP |
0x4650b1 CMPB $0,0x38(%RDI) |
0x4650b5 JE 465234 |
0x4650bb MOV %RDI,%R14 |
0x4650be CALL 404600 <omp_get_level@plt> |
0x4650c3 TEST %EAX,%EAX |
0x4650c5 JLE 4651c7 |
0x4650cb MOV %R14,-0x38(%RBP) |
0x4650cf MOV $0x1,%R12B |
0x4650d2 MOV %RAX,-0x40(%RBP) |
0x4650d6 CMP $0x8,%EAX |
0x4650d9 JB 46517c |
0x4650df MOV -0x40(%RBP),%RAX |
0x4650e3 MOV %EAX,%ECX |
0x4650e5 SHR $0x3,%ECX |
0x4650e8 LEA -0x7(%RAX),%R15D |
0x4650ec MOV $0x1,%R12B |
0x4650ef NOP |
(1245) 0x4650f0 MOV %ECX,-0x30(%RBP) |
(1245) 0x4650f3 LEA 0x7(%R15),%EDI |
(1245) 0x4650f7 CALL 404230 <omp_get_ancestor_thread_num@plt> |
(1245) 0x4650fc MOV %EAX,%EBX |
(1245) 0x4650fe LEA 0x6(%R15),%EDI |
(1245) 0x465102 CALL 404230 <omp_get_ancestor_thread_num@plt> |
(1245) 0x465107 MOV %EAX,%R13D |
(1245) 0x46510a OR %EBX,%R13D |
(1245) 0x46510d LEA 0x5(%R15),%EDI |
(1245) 0x465111 CALL 404230 <omp_get_ancestor_thread_num@plt> |
(1245) 0x465116 MOV %EAX,%EBX |
(1245) 0x465118 LEA 0x4(%R15),%EDI |
(1245) 0x46511c CALL 404230 <omp_get_ancestor_thread_num@plt> |
(1245) 0x465121 MOV %EAX,%R14D |
(1245) 0x465124 OR %EBX,%R14D |
(1245) 0x465127 OR %R13D,%R14D |
(1245) 0x46512a LEA 0x3(%R15),%EDI |
(1245) 0x46512e CALL 404230 <omp_get_ancestor_thread_num@plt> |
(1245) 0x465133 MOV %EAX,%R13D |
(1245) 0x465136 LEA 0x2(%R15),%EDI |
(1245) 0x46513a CALL 404230 <omp_get_ancestor_thread_num@plt> |
(1245) 0x46513f MOV %EAX,%EBX |
(1245) 0x465141 OR %R13D,%EBX |
(1245) 0x465144 OR %R14D,%EBX |
(1245) 0x465147 LEA 0x1(%R15),%EDI |
(1245) 0x46514b CALL 404230 <omp_get_ancestor_thread_num@plt> |
(1245) 0x465150 MOV %EAX,%R14D |
(1245) 0x465153 MOV %R15D,%EDI |
(1245) 0x465156 CALL 404230 <omp_get_ancestor_thread_num@plt> |
(1245) 0x46515b MOV -0x30(%RBP),%ECX |
(1245) 0x46515e OR %R14D,%EAX |
(1245) 0x465161 MOVZX %R12B,%R12D |
(1245) 0x465165 OR %EBX,%EAX |
(1245) 0x465167 MOV $0,%EAX |
(1245) 0x46516c CMOVNE %EAX,%R12D |
(1245) 0x465170 ADD $-0x8,%R15D |
(1245) 0x465174 DEC %ECX |
(1245) 0x465176 JNE 4650f0 |
0x46517c MOV -0x40(%RBP),%RBX |
0x465180 MOV %EBX,%EAX |
0x465182 AND $-0x8,%EAX |
0x465185 CMP %EBX,%EAX |
0x465187 JAE 4651bd |
0x465189 SUB %EAX,%EBX |
0x46518b XOR %R15D,%R15D |
0x46518e MOV -0x38(%RBP),%R14 |
0x465192 NOPW %CS:(%RAX,%RAX,1) |
(1244) 0x4651a0 MOV %EBX,%EDI |
(1244) 0x4651a2 CALL 404230 <omp_get_ancestor_thread_num@plt> |
(1244) 0x4651a7 TEST %EAX,%EAX |
(1244) 0x4651a9 MOVZX %R12B,%R12D |
(1244) 0x4651ad CMOVNE %R15D,%R12D |
(1244) 0x4651b1 DEC %EBX |
(1244) 0x4651b3 JNE 4651a0 |
0x4651b5 TEST $0x1,%R12B |
0x4651b9 JNE 4651c7 |
0x4651bb JMP 465234 |
0x4651bd MOV -0x38(%RBP),%R14 |
0x4651c1 TEST $0x1,%R12B |
0x4651c5 JE 465234 |
0x4651c7 CALL 4044c0 <omp_get_wtime@plt> |
0x4651cc VSUBSD (%R14),%XMM0,%XMM0 |
0x4651d1 VMOVSD %XMM0,-0x30(%RBP) |
0x4651d6 VADDSD 0x8(%R14),%XMM0,%XMM0 |
0x4651dc VMOVSD %XMM0,0x8(%R14) |
0x4651e2 INCQ 0x10(%R14) |
0x4651e6 LEA 0x68(%R14),%RDI |
0x4651ea LEA 0x50(%R14),%RBX |
0x4651ee MOV %RBX,%RSI |
0x4651f1 CALL 465250 <_ZNSt3mapIN11qmcplusplus13StackKeyParamILi2EEEdSt4lessIS2_ESaISt4pairIKS2_dEEEixERS6_> |
0x4651f6 VMOVSD -0x30(%RBP),%XMM0 |
0x4651fb VADDSD (%RAX),%XMM0,%XMM0 |
0x4651ff VMOVSD %XMM0,(%RAX) |
0x465203 LEA 0x98(%R14),%RDI |
0x46520a MOV %RBX,%RSI |
0x46520d CALL 4653b0 <_ZNSt3mapIN11qmcplusplus13StackKeyParamILi2EEElSt4lessIS2_ESaISt4pairIKS2_lEEEixERS6_> |
0x465212 INCQ (%RAX) |
0x465215 MOV 0x48(%R14),%RDI |
0x465219 TEST %RDI,%RDI |
0x46521c JE 465234 |
0x46521e MOV %R14,%RSI |
0x465221 ADD $0x18,%RSP |
0x465225 POP %RBX |
0x465226 POP %R12 |
0x465228 POP %R13 |
0x46522a POP %R14 |
0x46522c POP %R15 |
0x46522e POP %RBP |
0x46522f JMP 46ac70 |
0x465234 ADD $0x18,%RSP |
0x465238 POP %RBX |
0x465239 POP %R12 |
0x46523b POP %R13 |
0x46523d POP %R14 |
0x46523f POP %R15 |
0x465241 POP %RBP |
0x465242 RET |
0x465243 NOPW %CS:(%RAX,%RAX,1) |
0x46524d NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►33.33+ | qmcplusplus::WaveFunction::rat[...] | NewTimer.h:249 | exec |
○ | main.extracted.104 | refwrap.h:347 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 | |
►33.33+ | miniqmcreference::einspline_sp[...] | NewTimer.h:249 | exec |
○ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:194 | exec |
○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:214 | exec |
○ | main.extracted.104 | refwrap.h:347 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 | |
►16.67+ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:101 | exec |
○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:202 | exec |
○ | main.extracted.104 | refwrap.h:347 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 | |
►16.67+ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:206 | exec |
○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:214 | exec |
○ | main.extracted.104 | refwrap.h:347 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | NewTimer.cpp:85-120 |
Module | exec |
nb instructions | 79 |
nb uops | 87 |
loop length | 271 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 3 |
micro-operation queue | 21.75 cycles |
front end | 21.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 9.50 | 9.50 | 14.00 | 14.00 | 17.00 | 9.50 | 9.50 | 14.00 |
cycles | 9.50 | 9.50 | 14.00 | 14.00 | 17.00 | 9.50 | 9.50 | 14.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.48 |
Stall cycles | 0.00 |
Front-end | 21.75 |
Dispatch | 17.00 |
Overall L1 | 21.75 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 9% |
load | 9% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 10% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMPB $0,0x38(%RDI) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JE 465234 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 404600 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 4651c7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R14,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV $0x1,%R12B | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP $0x8,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JB 46517c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SHR $0x3,%ECX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA -0x7(%RAX),%R15D | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x1,%R12B | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x40(%RBP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x8,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %EBX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JAE 4651bd | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SUB %EAX,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x38(%RBP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST $0x1,%R12B | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 4651c7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 465234 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV -0x38(%RBP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST $0x1,%R12B | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 465234 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CALL 4044c0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VSUBSD (%R14),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VADDSD 0x8(%R14),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,0x8(%R14) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INCQ 0x10(%R14) | 3 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
LEA 0x68(%R14),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x50(%R14),%RBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 465250 <_ZNSt3mapIN11qmcplusplus13StackKeyParamILi2EEEdSt4lessIS2_ESaISt4pairIKS2_dEEEixERS6_> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VMOVSD -0x30(%RBP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VADDSD (%RAX),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,(%RAX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA 0x98(%R14),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4653b0 <_ZNSt3mapIN11qmcplusplus13StackKeyParamILi2EEElSt4lessIS2_ESaISt4pairIKS2_lEEEixERS6_> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
INCQ (%RAX) | 3 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
MOV 0x48(%R14),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %RDI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 465234 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R14,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
JMP 46ac70 <_ZN11qmcplusplus12TimerManagerINS_9TimerTypeINS_8CPUClockEEEE9pop_timerEPS3_> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | NewTimer.cpp:85-120 |
Module | exec |
nb instructions | 79 |
nb uops | 87 |
loop length | 271 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 3 |
micro-operation queue | 21.75 cycles |
front end | 21.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 9.50 | 9.50 | 14.00 | 14.00 | 17.00 | 9.50 | 9.50 | 14.00 |
cycles | 9.50 | 9.50 | 14.00 | 14.00 | 17.00 | 9.50 | 9.50 | 14.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.48 |
Stall cycles | 0.00 |
Front-end | 21.75 |
Dispatch | 17.00 |
Overall L1 | 21.75 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 9% |
load | 9% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 10% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMPB $0,0x38(%RDI) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JE 465234 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 404600 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 4651c7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R14,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV $0x1,%R12B | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP $0x8,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JB 46517c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SHR $0x3,%ECX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA -0x7(%RAX),%R15D | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x1,%R12B | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x40(%RBP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x8,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %EBX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JAE 4651bd | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SUB %EAX,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x38(%RBP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST $0x1,%R12B | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 4651c7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 465234 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV -0x38(%RBP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST $0x1,%R12B | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 465234 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CALL 4044c0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VSUBSD (%R14),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VADDSD 0x8(%R14),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,0x8(%R14) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INCQ 0x10(%R14) | 3 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
LEA 0x68(%R14),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x50(%R14),%RBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 465250 <_ZNSt3mapIN11qmcplusplus13StackKeyParamILi2EEEdSt4lessIS2_ESaISt4pairIKS2_dEEEixERS6_> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VMOVSD -0x30(%RBP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VADDSD (%RAX),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,(%RAX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA 0x98(%R14),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4653b0 <_ZNSt3mapIN11qmcplusplus13StackKeyParamILi2EEElSt4lessIS2_ESaISt4pairIKS2_lEEEixERS6_> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
INCQ (%RAX) | 3 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
MOV 0x48(%R14),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %RDI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 465234 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R14,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
JMP 46ac70 <_ZN11qmcplusplus12TimerManagerINS_9TimerTypeINS_8CPUClockEEEE9pop_timerEPS3_> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼qmcplusplus::TimerType | 0.04 | 0.03 |
○Loop 1244 - NewTimer.cpp:99-100 - exec | 0.01 | 0 |
○Loop 1245 - NewTimer.cpp:86-100 - exec | 0 | 0 |