Function: miniqmcreference::OneBodyJastrowRef<qmcplusplus::BsplineFunctor<double> >::ratio(qmcpluspl ... | Module: exec | Source: OneBodyJastrowRef.h:130-159 [...] | Coverage: 0.03% |
---|
Function: miniqmcreference::OneBodyJastrowRef<qmcplusplus::BsplineFunctor<double> >::ratio(qmcpluspl ... | Module: exec | Source: OneBodyJastrowRef.h:130-159 [...] | Coverage: 0.03% |
---|
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 302 - 305 |
-------------------------------------------------------------------------------- |
302: inline int first(int igroup) const { return SubPtcl[igroup]; } |
303: |
304: /// return the last index of a group i |
305: inline int last(int igroup) const { return SubPtcl[igroup + 1]; } |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 166 - 181 |
-------------------------------------------------------------------------------- |
166: if (r >= cutoff_radius) |
167: return 0.0; |
168: r *= DeltaRInv; |
169: real_type ipart, t; |
170: t = std::modf(r, &ipart); |
171: int i = (int)ipart; |
[...] |
179: (SplineCoefs[i+0]*(A[ 0]*tp[0] + A[ 1]*tp[1] + A[ 2]*tp[2] + A[ 3]*tp[3])+ |
180: SplineCoefs[i+1]*(A[ 4]*tp[0] + A[ 5]*tp[1] + A[ 6]*tp[2] + A[ 7]*tp[3])+ |
181: SplineCoefs[i+2]*(A[ 8]*tp[0] + A[ 9]*tp[1] + A[10]*tp[2] + A[11]*tp[3])+ |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 229 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
229: return X[i]; |
/home/kcamus/qaas_runs/169-451-1869/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/OneBodyJastrowRef.h: 130 - 159 |
-------------------------------------------------------------------------------- |
130: { |
131: UpdateMode = ORB_PBYP_RATIO; |
132: curAt = computeU(P.DistTables[myTableID]->Temp_r.data()); |
133: return std::exp(Vat[iat] - curAt); |
[...] |
145: if (NumGroups > 0) |
146: { |
147: for (int jg = 0; jg < NumGroups; ++jg) |
148: { |
149: if (F[jg] != nullptr) |
150: curVat += F[jg]->evaluateV(-1, Ions.first(jg), Ions.last(jg), dist, DistCompressed.data()); |
151: } |
152: } |
153: else |
154: { |
155: for (int c = 0; c < Nions; ++c) |
156: { |
157: int gid = Ions.GroupID[c]; |
158: if (F[gid] != nullptr) |
159: curVat += F[gid]->evaluate(dist[c]); |
/usr/lib64/gcc/x86_64-pc-linux-gnu/13.1.1/../../../../include/c++/13.1.1/bits/stl_vector.h: 1126 - 1258 |
-------------------------------------------------------------------------------- |
1126: return *(this->_M_impl._M_start + __n); |
[...] |
1258: { return _M_data_ptr(this->_M_impl._M_start); } |
0x418a30 PUSH %RBP |
0x418a31 MOV %RSP,%RBP |
0x418a34 PUSH %R15 |
0x418a36 PUSH %R14 |
0x418a38 PUSH %R13 |
0x418a3a PUSH %R12 |
0x418a3c PUSH %RBX |
0x418a3d SUB $0x18,%RSP |
0x418a41 MOV %EDX,-0x34(%RBP) |
0x418a44 MOV %RDI,%R13 |
0x418a47 MOVL $0,0xc(%RDI) |
0x418a4e MOVSXD 0x90(%RDI),%RAX |
0x418a55 MOV 0xa10(%RSI),%RCX |
0x418a5c MOV (%RCX,%RAX,8),%RAX |
0x418a60 MOV 0x68(%RAX),%R15 |
0x418a64 MOV 0x9c(%RDI),%EAX |
0x418a6a TEST %EAX,%EAX |
0x418a6c JLE 418ae6 |
0x418a6e VXORPD %XMM3,%XMM3,%XMM3 |
0x418a72 XOR %ECX,%ECX |
0x418a74 JMP 418ad1 |
0x418a76 NOPW %CS:(%RAX,%RAX,1) |
(198) 0x418a80 MOV 0xa0(%R13),%RAX |
(198) 0x418a87 MOV 0x140(%R13),%R9 |
(198) 0x418a8e MOV 0xa98(%RAX),%RAX |
(198) 0x418a95 LEA 0x1(%RCX),%RBX |
(198) 0x418a99 MOV (%RAX,%RCX,4),%EDX |
(198) 0x418a9c MOV 0x4(%RAX,%RCX,4),%ECX |
(198) 0x418aa0 MOV $-0x1,%ESI |
(198) 0x418aa5 MOV %R15,%R8 |
(198) 0x418aa8 VMOVSD %XMM3,-0x30(%RBP) |
(198) 0x418aad CALL 41b860 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd> |
(198) 0x418ab2 VMOVSD -0x30(%RBP),%XMM3 |
(198) 0x418ab7 VADDSD %XMM3,%XMM0,%XMM3 |
(198) 0x418abb MOV 0x9c(%R13),%EAX |
(198) 0x418ac2 MOV %RBX,%RCX |
(198) 0x418ac5 MOVSXD %EAX,%RDX |
(198) 0x418ac8 CMP %RDX,%RCX |
(198) 0x418acb JGE 418c13 |
(198) 0x418ad1 MOV 0x1c0(%R13),%RDX |
(198) 0x418ad8 MOV (%RDX,%RCX,8),%RDI |
(198) 0x418adc TEST %RDI,%RDI |
(198) 0x418adf JNE 418a80 |
(198) 0x418ae1 INC %RCX |
(198) 0x418ae4 JMP 418ac5 |
0x418ae6 MOV 0x94(%R13),%EAX |
0x418aed VXORPD %XMM3,%XMM3,%XMM3 |
0x418af1 TEST %EAX,%EAX |
0x418af3 JLE 418c13 |
0x418af9 XOR %EBX,%EBX |
0x418afb LEA -0x40(%RBP),%R12 |
0x418aff JMP 418b23 |
0x418b01 NOPW %CS:(%RAX,%RAX,1) |
(197) 0x418b10 VADDSD %XMM3,%XMM1,%XMM3 |
(197) 0x418b14 INC %RBX |
(197) 0x418b17 MOVSXD %EAX,%RCX |
(197) 0x418b1a CMP %RCX,%RBX |
(197) 0x418b1d JGE 418c13 |
(197) 0x418b23 MOV 0xa0(%R13),%RCX |
(197) 0x418b2a MOV 0x1c0(%R13),%RDX |
(197) 0x418b31 MOV 0x5c0(%RCX),%RCX |
(197) 0x418b38 MOVSXD (%RCX,%RBX,4),%RCX |
(197) 0x418b3c MOV (%RDX,%RCX,8),%R14 |
(197) 0x418b40 TEST %R14,%R14 |
(197) 0x418b43 JE 418b14 |
(197) 0x418b45 VMOVSD (%R15,%RBX,8),%XMM0 |
(197) 0x418b4b VMOVSD 0x8(%R14),%XMM2 |
(197) 0x418b51 VXORPD %XMM1,%XMM1,%XMM1 |
(197) 0x418b55 VUCOMISD %XMM0,%XMM2 |
(197) 0x418b59 JBE 418b10 |
(197) 0x418b5b VMULSD 0x238(%R14),%XMM0,%XMM0 |
(197) 0x418b64 MOV %R12,%RDI |
(197) 0x418b67 VMOVSD %XMM3,-0x30(%RBP) |
(197) 0x418b6c CALL 470440 <modf> |
(197) 0x418b71 VCVTTSD2SI -0x40(%RBP),%EAX |
(197) 0x418b76 CLTQ |
(197) 0x418b78 MOV 0x218(%R14),%RCX |
(197) 0x418b7f VMOVSD 0x20(%R14),%XMM1 |
(197) 0x418b85 VFMADD231SD 0x18(%R14),%XMM0,%XMM1 |
(197) 0x418b8b VFMADD213SD 0x28(%R14),%XMM0,%XMM1 |
(197) 0x418b91 VFMADD213SD 0x30(%R14),%XMM0,%XMM1 |
(197) 0x418b97 VMULSD (%RCX,%RAX,8),%XMM1,%XMM1 |
(197) 0x418b9c VMOVSD 0x40(%R14),%XMM2 |
(197) 0x418ba2 VFMADD231SD 0x38(%R14),%XMM0,%XMM2 |
(197) 0x418ba8 VFMADD213SD 0x48(%R14),%XMM0,%XMM2 |
(197) 0x418bae VFMADD213SD 0x50(%R14),%XMM0,%XMM2 |
(197) 0x418bb4 VFMADD132SD 0x8(%RCX,%RAX,8),%XMM1,%XMM2 |
(197) 0x418bbb VMOVSD 0x60(%R14),%XMM3 |
(197) 0x418bc1 VFMADD231SD 0x58(%R14),%XMM0,%XMM3 |
(197) 0x418bc7 VFMADD213SD 0x68(%R14),%XMM0,%XMM3 |
(197) 0x418bcd VFMADD213SD 0x70(%R14),%XMM0,%XMM3 |
(197) 0x418bd3 VFMADD132SD 0x10(%RCX,%RAX,8),%XMM2,%XMM3 |
(197) 0x418bda VMOVSD 0x80(%R14),%XMM1 |
(197) 0x418be3 VFMADD231SD 0x78(%R14),%XMM0,%XMM1 |
(197) 0x418be9 VFMADD213SD 0x88(%R14),%XMM0,%XMM1 |
(197) 0x418bf2 VFMADD213SD 0x90(%R14),%XMM0,%XMM1 |
(197) 0x418bfb VFMADD132SD 0x18(%RCX,%RAX,8),%XMM3,%XMM1 |
(197) 0x418c02 VMOVSD -0x30(%RBP),%XMM3 |
(197) 0x418c07 MOV 0x94(%R13),%EAX |
(197) 0x418c0e JMP 418b10 |
0x418c13 VMOVSD %XMM3,0xa8(%R13) |
0x418c1c MOVSXD -0x34(%RBP),%RAX |
0x418c20 MOV 0xe8(%R13),%RCX |
0x418c27 VMOVSD (%RCX,%RAX,8),%XMM0 |
0x418c2c VSUBSD %XMM3,%XMM0,%XMM0 |
0x418c30 ADD $0x18,%RSP |
0x418c34 POP %RBX |
0x418c35 POP %R12 |
0x418c37 POP %R13 |
0x418c39 POP %R14 |
0x418c3b POP %R15 |
0x418c3d POP %RBP |
0x418c3e JMP 4703c0 |
0x418c43 NOPW %CS:(%RAX,%RAX,1) |
0x418c4d NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►66.67+ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:219 | exec |
○ | main.extracted.104 | stl_vector.h:1126 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 | |
►33.33+ | main.extracted.104 | stl_vector.h:1126 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | main | miniqmc.cpp:404 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | OneBodyJastrowRef.h:130-159 |
Module | exec |
nb instructions | 45 |
nb uops | 45 |
loop length | 183 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 2 |
micro-operation queue | 11.25 cycles |
front end | 11.25 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 2.75 | 2.75 | 8.17 | 7.83 | 9.00 | 2.50 | 3.00 | 8.00 |
cycles | 2.75 | 2.75 | 8.17 | 7.83 | 9.00 | 2.50 | 3.00 | 8.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 11.43 |
Stall cycles | 0.00 |
Front-end | 11.25 |
Dispatch | 9.00 |
Overall L1 | 11.25 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 40% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 18% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 40% |
all | 7% |
load | 12% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 17% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 11% |
load | 12% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %EDX,-0x34(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOVL $0,0xc(%RDI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
MOVSXD 0x90(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0xa10(%RSI),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RCX,%RAX,8),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x68(%RAX),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x9c(%RDI),%EAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 418ae6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 418ad1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x94(%R13),%EAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 418c13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA -0x40(%RBP),%R12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 418b23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD %XMM3,0xa8(%R13) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOVSXD -0x34(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0xe8(%R13),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD (%RCX,%RAX,8),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VSUBSD %XMM3,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
JMP 4703c0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | OneBodyJastrowRef.h:130-159 |
Module | exec |
nb instructions | 45 |
nb uops | 45 |
loop length | 183 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 2 |
micro-operation queue | 11.25 cycles |
front end | 11.25 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 2.75 | 2.75 | 8.17 | 7.83 | 9.00 | 2.50 | 3.00 | 8.00 |
cycles | 2.75 | 2.75 | 8.17 | 7.83 | 9.00 | 2.50 | 3.00 | 8.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 11.43 |
Stall cycles | 0.00 |
Front-end | 11.25 |
Dispatch | 9.00 |
Overall L1 | 11.25 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 40% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 18% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 40% |
all | 7% |
load | 12% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 17% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 11% |
load | 12% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %EDX,-0x34(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOVL $0,0xc(%RDI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
MOVSXD 0x90(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0xa10(%RSI),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RCX,%RAX,8),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x68(%RAX),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x9c(%RDI),%EAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 418ae6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 418ad1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x94(%R13),%EAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 418c13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA -0x40(%RBP),%R12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 418b23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD %XMM3,0xa8(%R13) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOVSXD -0x34(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0xe8(%R13),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD (%RCX,%RAX,8),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VSUBSD %XMM3,%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD $0x18,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
JMP 4703c0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::OneBodyJastrowRef | 0.03 | 0.02 |
○Loop 197 - OneBodyJastrowRef.h:155-159 - exec | 0 | 0 |
○Loop 198 - OneBodyJastrowRef.h:147-150 - exec | 0 | 0 |