Function: _ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRati ... | Module: exec | Source: OneBodyJastrowRef.h:133-155 [...] | Coverage: 0.01% |
---|
Function: _ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRati ... | Module: exec | Source: OneBodyJastrowRef.h:133-155 [...] | Coverage: 0.01% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 313 - 316 |
-------------------------------------------------------------------------------- |
313: inline int first(int igroup) const { return (*group_offsets_)[igroup]; } |
314: |
315: ///return the last index of a group i |
316: inline int last(int igroup) const { return (*group_offsets_)[igroup + 1]; } |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/OneBodyJastrowRef.h: 133 - 155 |
-------------------------------------------------------------------------------- |
133: { |
134: for (int k = 0; k < ratios.size(); ++k) |
135: ratios[k] = std::exp(Vat[VP.refPtcl] - computeU(VP.getDistTableAB(myTableID).getDistRow(k).data())); |
136: } |
137: |
138: inline valT computeU(const valT* dist) |
139: { |
140: valT curVat(0); |
141: if (NumGroups > 0) |
142: { |
143: for (int jg = 0; jg < NumGroups; ++jg) |
144: { |
145: if (F[jg] != nullptr) |
146: curVat += F[jg]->evaluateV(-1, Ions.first(jg), Ions.last(jg), dist, DistCompressed.data()); |
147: } |
148: } |
149: else |
150: { |
151: for (int c = 0; c < Nions; ++c) |
152: { |
153: int gid = Ions.GroupID[c]; |
154: if (F[gid] != nullptr) |
155: curVat += F[gid]->evaluate(dist[c]); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 249 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
229: return X[i]; |
[...] |
249: inline const_pointer data() const { return X; } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/shared_ptr_base.h: 1296 - 1296 |
-------------------------------------------------------------------------------- |
1296: { return _M_ptr; } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 919 - 1169 |
-------------------------------------------------------------------------------- |
919: { return size_type(this->_M_impl._M_finish - this->_M_impl._M_start); } |
[...] |
1046: return *(this->_M_impl._M_start + __n); |
[...] |
1064: return *(this->_M_impl._M_start + __n); |
[...] |
1169: { return _M_data_ptr(this->_M_impl._M_start); } |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 166 - 182 |
-------------------------------------------------------------------------------- |
166: if (r >= cutoff_radius) |
167: return 0.0; |
168: r *= DeltaRInv; |
169: real_type ipart, t; |
170: t = std::modf(r, &ipart); |
171: int i = (int)ipart; |
172: real_type tp[4]; |
173: tp[0] = t * t * t; |
[...] |
179: (SplineCoefs[i+0]*(A[ 0]*tp[0] + A[ 1]*tp[1] + A[ 2]*tp[2] + A[ 3]*tp[3])+ |
180: SplineCoefs[i+1]*(A[ 4]*tp[0] + A[ 5]*tp[1] + A[ 6]*tp[2] + A[ 7]*tp[3])+ |
181: SplineCoefs[i+2]*(A[ 8]*tp[0] + A[ 9]*tp[1] + A[10]*tp[2] + A[11]*tp[3])+ |
182: SplineCoefs[i+3]*(A[12]*tp[0] + A[13]*tp[1] + A[14]*tp[2] + A[15]*tp[3])); |
0x422070 PUSH %RBP |
0x422071 MOV %RSP,%RBP |
0x422074 PUSH %R15 |
0x422076 PUSH %R14 |
0x422078 PUSH %R13 |
0x42207a PUSH %R12 |
0x42207c PUSH %RBX |
0x42207d SUB $0x48,%RSP |
0x422081 MOV %RSI,-0x38(%RBP) |
0x422085 MOV 0x8(%RDX),%RAX |
0x422089 MOV %RDX,-0x40(%RBP) |
0x42208d CMP (%RDX),%RAX |
0x422090 JE 422354 |
0x422096 MOV %RDI,%R15 |
0x422099 XOR %EBX,%EBX |
0x42209b JMP 4220e5 |
(300) 0x42209d XORPD %XMM3,%XMM3 |
(300) 0x4220a1 NOPW %CS:(%RAX,%RAX,1) |
(300) 0x4220b0 MOVSD -0x48(%RBP),%XMM0 |
(300) 0x4220b5 SUBSD %XMM3,%XMM0 |
(300) 0x4220b9 CALL 4f1fd0 <exp> |
(300) 0x4220be MOV -0x40(%RBP),%RCX |
(300) 0x4220c2 MOV (%RCX),%RAX |
(300) 0x4220c5 MOV -0x50(%RBP),%RBX |
(300) 0x4220c9 MOVSD %XMM0,(%RAX,%RBX,8) |
(300) 0x4220ce INC %RBX |
(300) 0x4220d1 MOV 0x8(%RCX),%RCX |
(300) 0x4220d5 SUB %RAX,%RCX |
(300) 0x4220d8 SAR $0x3,%RCX |
(300) 0x4220dc CMP %RBX,%RCX |
(300) 0x4220df JBE 422354 |
(300) 0x4220e5 MOV -0x38(%RBP),%RDI |
(300) 0x4220e9 MOVSXD 0x2a0(%RDI),%RAX |
(300) 0x4220f0 MOV 0xf0(%R15),%RCX |
(300) 0x4220f7 MOVSD (%RCX,%RAX,8),%XMM0 |
(300) 0x4220fc MOVSD %XMM0,-0x48(%RBP) |
(300) 0x422101 MOV 0xa8(%R15),%ESI |
(300) 0x422108 CALL 462dd0 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> |
(300) 0x42210d MOV 0x48(%RAX),%RAX |
(300) 0x422111 LEA (%RBX,%RBX,4),%RCX |
(300) 0x422115 MOV 0x18(%RAX,%RCX,8),%R14 |
(300) 0x42211a MOV 0x98(%R15),%EAX |
(300) 0x422121 TEST %EAX,%EAX |
(300) 0x422123 MOV %RBX,-0x50(%RBP) |
(300) 0x422127 JLE 4221b0 |
(300) 0x42212d XORPD %XMM3,%XMM3 |
(300) 0x422131 XOR %EBX,%EBX |
(300) 0x422133 JMP 42214f |
0x422135 NOPW %CS:(%RAX,%RAX,1) |
(302) 0x422140 MOVSXD %EAX,%RCX |
(302) 0x422143 INC %RBX |
(302) 0x422146 CMP %RCX,%RBX |
(302) 0x422149 JGE 4220b0 |
(302) 0x42214f MOV 0x1c8(%R15),%RCX |
(302) 0x422156 MOV (%RCX,%RBX,8),%RDI |
(302) 0x42215a TEST %RDI,%RDI |
(302) 0x42215d JE 422140 |
(302) 0x42215f MOV 0xa0(%R15),%RAX |
(302) 0x422166 MOV 0x148(%R15),%R9 |
(302) 0x42216d MOV 0x268(%RAX),%RAX |
(302) 0x422174 MOV 0x18(%RAX),%RAX |
(302) 0x422178 MOV (%RAX,%RBX,4),%EDX |
(302) 0x42217b MOV 0x4(%RAX,%RBX,4),%ECX |
(302) 0x42217f MOV $-0x1,%ESI |
(302) 0x422184 MOV %R14,%R8 |
(302) 0x422187 MOVSD %XMM3,-0x30(%RBP) |
(302) 0x42218c CALL 424900 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd> |
(302) 0x422191 MOVSD -0x30(%RBP),%XMM3 |
(302) 0x422196 ADDSD %XMM0,%XMM3 |
(302) 0x42219a MOV 0x98(%R15),%EAX |
(302) 0x4221a1 JMP 422140 |
0x4221a3 NOPW %CS:(%RAX,%RAX,1) |
(300) 0x4221b0 MOV 0x90(%R15),%R13D |
(300) 0x4221b7 TEST %R13D,%R13D |
(300) 0x4221ba JLE 42209d |
(300) 0x4221c0 MOV 0xa0(%R15),%RAX |
(300) 0x4221c7 MOV 0x1c8(%R15),%RCX |
(300) 0x4221ce MOV 0x18(%RAX),%RDX |
(300) 0x4221d2 XORPD %XMM3,%XMM3 |
(300) 0x4221d6 XOR %EBX,%EBX |
(300) 0x4221d8 MOV %RCX,-0x60(%RBP) |
(300) 0x4221dc MOV %R14,-0x58(%RBP) |
(300) 0x4221e0 JMP 422200 |
0x4221e2 NOPW %CS:(%RAX,%RAX,1) |
(301) 0x4221f0 ADDSD %XMM1,%XMM3 |
(301) 0x4221f4 INC %RBX |
(301) 0x4221f7 CMP %RBX,%R13 |
(301) 0x4221fa JE 4220b0 |
(301) 0x422200 MOVSXD (%RDX,%RBX,4),%RAX |
(301) 0x422204 MOV (%RCX,%RAX,8),%R12 |
(301) 0x422208 TEST %R12,%R12 |
(301) 0x42220b JE 4221f4 |
(301) 0x42220d MOVSD (%R14,%RBX,8),%XMM0 |
(301) 0x422213 MOVSD 0x8(%R12),%XMM2 |
(301) 0x42221a XORPD %XMM1,%XMM1 |
(301) 0x42221e UCOMISD %XMM0,%XMM2 |
(301) 0x422222 JBE 4221f0 |
(301) 0x422224 MULSD 0x238(%R12),%XMM0 |
(301) 0x42222e LEA -0x68(%RBP),%RDI |
(301) 0x422232 MOVSD %XMM3,-0x30(%RBP) |
(301) 0x422237 MOV %RDX,%R14 |
(301) 0x42223a CALL 4f2050 <modf> |
(301) 0x42223f MOV %R14,%RDX |
(301) 0x422242 MOV -0x58(%RBP),%R14 |
(301) 0x422246 CVTTSD2SI -0x68(%RBP),%EAX |
(301) 0x42224b CLTQ |
(301) 0x42224d MOV 0x218(%R12),%RCX |
(301) 0x422255 MOVAPD %XMM0,%XMM4 |
(301) 0x422259 MULSD %XMM0,%XMM4 |
(301) 0x42225d MOVAPD %XMM4,%XMM1 |
(301) 0x422261 MULSD %XMM0,%XMM1 |
(301) 0x422265 MOVSD 0x18(%R12),%XMM5 |
(301) 0x42226c MOVSD 0x20(%R12),%XMM6 |
(301) 0x422273 MOVHPD 0x38(%R12),%XMM6 |
(301) 0x42227a MOVSD 0x28(%R12),%XMM3 |
(301) 0x422281 MOVSD 0x60(%R12),%XMM7 |
(301) 0x422288 MOVHPD 0x78(%R12),%XMM7 |
(301) 0x42228f MOVSD 0x30(%R12),%XMM2 |
(301) 0x422296 MOVAPD %XMM4,%XMM8 |
(301) 0x42229b UNPCKLPD %XMM1,%XMM8 |
(301) 0x4222a0 MULPD %XMM8,%XMM7 |
(301) 0x4222a5 MULPD %XMM6,%XMM8 |
(301) 0x4222aa MOVHPD 0x40(%R12),%XMM5 |
(301) 0x4222b1 MOVSD 0x58(%R12),%XMM6 |
(301) 0x4222b8 MOVHPD 0x80(%R12),%XMM6 |
(301) 0x4222c2 UNPCKLPD %XMM4,%XMM1 |
(301) 0x4222c6 MULPD %XMM1,%XMM6 |
(301) 0x4222ca ADDPD %XMM7,%XMM6 |
(301) 0x4222ce MULPD %XMM5,%XMM1 |
(301) 0x4222d2 ADDPD %XMM8,%XMM1 |
(301) 0x4222d7 MOVSD 0x68(%R12),%XMM4 |
(301) 0x4222de MOVHPD 0x88(%R12),%XMM4 |
(301) 0x4222e8 MOVHPD 0x48(%R12),%XMM3 |
(301) 0x4222ef UNPCKLPD %XMM0,%XMM0 |
(301) 0x4222f3 MULPD %XMM0,%XMM3 |
(301) 0x4222f7 MULPD %XMM4,%XMM0 |
(301) 0x4222fb MOVSD 0x70(%R12),%XMM4 |
(301) 0x422302 MOVHPD 0x90(%R12),%XMM4 |
(301) 0x42230c ADDPD %XMM0,%XMM4 |
(301) 0x422310 ADDPD %XMM6,%XMM4 |
(301) 0x422314 MOVHPD 0x50(%R12),%XMM2 |
(301) 0x42231b ADDPD %XMM3,%XMM2 |
(301) 0x42231f MOVSD -0x30(%RBP),%XMM3 |
(301) 0x422324 ADDPD %XMM1,%XMM2 |
(301) 0x422328 MOVUPD (%RCX,%RAX,8),%XMM0 |
(301) 0x42232d MULPD %XMM2,%XMM0 |
(301) 0x422331 MOVUPD 0x10(%RCX,%RAX,8),%XMM2 |
(301) 0x422337 MOV -0x60(%RBP),%RCX |
(301) 0x42233b MULPD %XMM4,%XMM2 |
(301) 0x42233f ADDPD %XMM0,%XMM2 |
(301) 0x422343 MOVAPD %XMM2,%XMM1 |
(301) 0x422347 UNPCKHPD %XMM2,%XMM1 |
(301) 0x42234b ADDSD %XMM2,%XMM1 |
(301) 0x42234f JMP 4221f0 |
0x422354 ADD $0x48,%RSP |
0x422358 POP %RBX |
0x422359 POP %R12 |
0x42235b POP %R13 |
0x42235d POP %R14 |
0x42235f POP %R15 |
0x422361 POP %RBP |
0x422362 RET |
0x422363 NOPW %CS:(%RAX,%RAX,1) |
0x42236d NOPL (%RAX) |
Path / |
Source file and lines | OneBodyJastrowRef.h:133-155 |
Module | exec |
nb instructions | 29 |
nb uops | 24 |
loop length | 111 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 2 |
micro-operation queue | 4.00 cycles |
front end | 4.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.00 | 1.00 | 1.00 | 1.50 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 1.50 | 1.00 | 1.00 | 1.00 | 1.50 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 4.00 |
Dispatch | 1.50 |
Overall L1 | 4.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x48,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x8(%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP (%RDX),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 422354 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x2e4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4220e5 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x75> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x48,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | OneBodyJastrowRef.h:133-155 |
Module | exec |
nb instructions | 29 |
nb uops | 24 |
loop length | 111 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 2 |
micro-operation queue | 4.00 cycles |
front end | 4.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.00 | 1.00 | 1.00 | 1.50 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 1.50 | 1.00 | 1.00 | 1.00 | 1.50 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 4.00 |
Dispatch | 1.50 |
Overall L1 | 4.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x48,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x8(%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RDX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP (%RDX),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 422354 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x2e4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4220e5 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x75> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x48,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE– | 0.01 | 0.03 |
▼Loop 300 - OneBodyJastrowRef.h:134-155 - exec– | 0.01 | 0.02 |
○Loop 301 - BsplineFunctor.h:166-182 - exec | 0 | 0 |
○Loop 302 - OneBodyJastrowRef.h:143-146 - exec | 0 | 0.01 |