Function: miniqmcreference::OneBodyJastrowRef<qmcplusplus::BsplineFunctor<double> >::evaluateRatios( ... | Module: libqmcwfs.so | Source: OneBodyJastrowRef.h:133-155 [...] | Coverage: 0.01% |
---|
Function: miniqmcreference::OneBodyJastrowRef<qmcplusplus::BsplineFunctor<double> >::evaluateRatios( ... | Module: libqmcwfs.so | Source: OneBodyJastrowRef.h:133-155 [...] | Coverage: 0.01% |
---|
/usr/lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h: 1296 - 1296 |
-------------------------------------------------------------------------------- |
1296: { return _M_ptr; } |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 166 - 182 |
-------------------------------------------------------------------------------- |
166: if (r >= cutoff_radius) |
167: return 0.0; |
168: r *= DeltaRInv; |
169: real_type ipart, t; |
170: t = std::modf(r, &ipart); |
171: int i = (int)ipart; |
172: real_type tp[4]; |
173: tp[0] = t * t * t; |
[...] |
179: (SplineCoefs[i+0]*(A[ 0]*tp[0] + A[ 1]*tp[1] + A[ 2]*tp[2] + A[ 3]*tp[3])+ |
180: SplineCoefs[i+1]*(A[ 4]*tp[0] + A[ 5]*tp[1] + A[ 6]*tp[2] + A[ 7]*tp[3])+ |
181: SplineCoefs[i+2]*(A[ 8]*tp[0] + A[ 9]*tp[1] + A[10]*tp[2] + A[11]*tp[3])+ |
182: SplineCoefs[i+3]*(A[12]*tp[0] + A[13]*tp[1] + A[14]*tp[2] + A[15]*tp[3])); |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 313 - 316 |
-------------------------------------------------------------------------------- |
313: inline int first(int igroup) const { return (*group_offsets_)[igroup]; } |
314: |
315: ///return the last index of a group i |
316: inline int last(int igroup) const { return (*group_offsets_)[igroup + 1]; } |
/usr/lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/stl_vector.h: 919 - 1169 |
-------------------------------------------------------------------------------- |
919: { return size_type(this->_M_impl._M_finish - this->_M_impl._M_start); } |
[...] |
1046: return *(this->_M_impl._M_start + __n); |
[...] |
1064: return *(this->_M_impl._M_start + __n); |
[...] |
1169: { return _M_data_ptr(this->_M_impl._M_start); } |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 249 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
229: return X[i]; |
[...] |
249: inline const_pointer data() const { return X; } |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/OneBodyJastrowRef.h: 133 - 155 |
-------------------------------------------------------------------------------- |
133: { |
134: for (int k = 0; k < ratios.size(); ++k) |
135: ratios[k] = std::exp(Vat[VP.refPtcl] - computeU(VP.getDistTableAB(myTableID).getDistRow(k).data())); |
136: } |
137: |
138: inline valT computeU(const valT* dist) |
139: { |
140: valT curVat(0); |
141: if (NumGroups > 0) |
142: { |
143: for (int jg = 0; jg < NumGroups; ++jg) |
144: { |
145: if (F[jg] != nullptr) |
146: curVat += F[jg]->evaluateV(-1, Ions.first(jg), Ions.last(jg), dist, DistCompressed.data()); |
147: } |
148: } |
149: else |
150: { |
151: for (int c = 0; c < Nions; ++c) |
152: { |
153: int gid = Ions.GroupID[c]; |
154: if (F[gid] != nullptr) |
155: curVat += F[gid]->evaluate(dist[c]); |
0x3e390 PUSH %RBP |
0x3e391 MOV %RSP,%RBP |
0x3e394 PUSH %R15 |
0x3e396 PUSH %R14 |
0x3e398 PUSH %R13 |
0x3e39a PUSH %R12 |
0x3e39c PUSH %RBX |
0x3e39d SUB $0x28,%RSP |
0x3e3a1 MOV 0x8(%RDX),%RAX |
0x3e3a5 MOV %RSI,-0x38(%RBP) |
0x3e3a9 MOV %RDX,-0x40(%RBP) |
0x3e3ad CMP (%RDX),%RAX |
0x3e3b0 JE 3e5f0 |
0x3e3b6 MOV %RDI,%R15 |
0x3e3b9 XOR %R12D,%R12D |
0x3e3bc JMP 3e3f2 |
0x3e3be XCHG %AX,%AX |
(731) 0x3e3c0 VMOVSD -0x48(%RBP),%XMM0 |
(731) 0x3e3c5 VSUBSD %XMM3,%XMM0,%XMM0 |
(731) 0x3e3c9 CALL 4ecf0 <@plt_start@+0x640> |
(731) 0x3e3ce MOV -0x40(%RBP),%RCX |
(731) 0x3e3d2 MOV (%RCX),%RAX |
(731) 0x3e3d5 VMOVSD %XMM0,(%RAX,%R12,8) |
(731) 0x3e3db INC %R12 |
(731) 0x3e3de MOV 0x8(%RCX),%RCX |
(731) 0x3e3e2 SUB %RAX,%RCX |
(731) 0x3e3e5 SAR $0x3,%RCX |
(731) 0x3e3e9 CMP %R12,%RCX |
(731) 0x3e3ec JBE 3e5f0 |
(731) 0x3e3f2 MOV -0x38(%RBP),%RDI |
(731) 0x3e3f6 MOV 0xf0(%R15),%RCX |
(731) 0x3e3fd MOV 0xa8(%R15),%ESI |
(731) 0x3e404 MOVSXD 0x2a0(%RDI),%RAX |
(731) 0x3e40b VMOVSD (%RCX,%RAX,8),%XMM0 |
(731) 0x3e410 VMOVSD %XMM0,-0x48(%RBP) |
(731) 0x3e415 CALL 4ea70 <@plt_start@+0x3c0> |
(731) 0x3e41a MOV 0x48(%RAX),%RAX |
(731) 0x3e41e LEA (%R12,%R12,4),%RCX |
(731) 0x3e422 MOV 0x18(%RAX,%RCX,8),%R13 |
(731) 0x3e427 MOV 0x98(%R15),%EAX |
(731) 0x3e42e TEST %EAX,%EAX |
(731) 0x3e430 JLE 3e4b0 |
(731) 0x3e432 VXORPD %XMM3,%XMM3,%XMM3 |
(731) 0x3e436 XOR %EBX,%EBX |
(731) 0x3e438 JMP 3e44f |
0x3e43a NOPW (%RAX,%RAX,1) |
(733) 0x3e440 MOVSXD %EAX,%RCX |
(733) 0x3e443 INC %RBX |
(733) 0x3e446 CMP %RCX,%RBX |
(733) 0x3e449 JGE 3e3c0 |
(733) 0x3e44f MOV 0x1c8(%R15),%RCX |
(733) 0x3e456 MOV (%RCX,%RBX,8),%RDI |
(733) 0x3e45a TEST %RDI,%RDI |
(733) 0x3e45d JE 3e440 |
(733) 0x3e45f MOV 0xa0(%R15),%RAX |
(733) 0x3e466 MOV 0x148(%R15),%R9 |
(733) 0x3e46d MOV $-0x1,%ESI |
(733) 0x3e472 MOV %R13,%R8 |
(733) 0x3e475 VMOVSD %XMM3,-0x30(%RBP) |
(733) 0x3e47a MOV 0x268(%RAX),%RAX |
(733) 0x3e481 MOV 0x18(%RAX),%RAX |
(733) 0x3e485 MOV (%RAX,%RBX,4),%EDX |
(733) 0x3e488 MOV 0x4(%RAX,%RBX,4),%ECX |
(733) 0x3e48c CALL 307d0 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd> |
(733) 0x3e491 VMOVSD -0x30(%RBP),%XMM3 |
(733) 0x3e496 MOV 0x98(%R15),%EAX |
(733) 0x3e49d VADDSD %XMM3,%XMM0,%XMM3 |
(733) 0x3e4a1 JMP 3e440 |
0x3e4a3 NOPW %CS:(%RAX,%RAX,1) |
(731) 0x3e4b0 MOV 0x90(%R15),%EAX |
(731) 0x3e4b7 VXORPD %XMM3,%XMM3,%XMM3 |
(731) 0x3e4bb TEST %EAX,%EAX |
(731) 0x3e4bd JLE 3e3c0 |
(731) 0x3e4c3 XOR %R14D,%R14D |
(731) 0x3e4c6 JMP 3e4e3 |
0x3e4c8 NOPL (%RAX,%RAX,1) |
(732) 0x3e4d0 VADDSD %XMM3,%XMM0,%XMM3 |
(732) 0x3e4d4 INC %R14 |
(732) 0x3e4d7 MOVSXD %EAX,%RCX |
(732) 0x3e4da CMP %RCX,%R14 |
(732) 0x3e4dd JGE 3e3c0 |
(732) 0x3e4e3 MOV 0xa0(%R15),%RCX |
(732) 0x3e4ea MOV 0x1c8(%R15),%RDX |
(732) 0x3e4f1 MOV 0x18(%RCX),%RCX |
(732) 0x3e4f5 MOVSXD (%RCX,%R14,4),%RCX |
(732) 0x3e4f9 MOV (%RDX,%RCX,8),%RBX |
(732) 0x3e4fd TEST %RBX,%RBX |
(732) 0x3e500 JE 3e4d4 |
(732) 0x3e502 VMOVSD (%R13,%R14,8),%XMM1 |
(732) 0x3e509 VMOVSD 0x8(%RBX),%XMM2 |
(732) 0x3e50e VXORPD %XMM0,%XMM0,%XMM0 |
(732) 0x3e512 VUCOMISD %XMM1,%XMM2 |
(732) 0x3e516 JBE 3e4d0 |
(732) 0x3e518 VMULSD 0x238(%RBX),%XMM1,%XMM0 |
(732) 0x3e520 LEA -0x50(%RBP),%RDI |
(732) 0x3e524 VMOVSD %XMM3,-0x30(%RBP) |
(732) 0x3e529 CALL 4ea80 <@plt_start@+0x3d0> |
(732) 0x3e52e VMOVUPD 0x18(%RBX),%XMM3 |
(732) 0x3e533 VMOVUPD 0x30(%RBX),%XMM5 |
(732) 0x3e538 VMULSD %XMM0,%XMM0,%XMM1 |
(732) 0x3e53c VCVTTSD2SI -0x50(%RBP),%EAX |
(732) 0x3e541 VMOVUPD 0x28(%RBX),%XMM4 |
(732) 0x3e546 MOV 0x218(%RBX),%RCX |
(732) 0x3e54d VBLENDPD $0x1,0x20(%RBX),%XMM5,%XMM6 |
(732) 0x3e554 VMOVHPD 0x40(%RBX),%XMM3,%XMM3 |
(732) 0x3e559 VMOVHPD 0x50(%RBX),%XMM5,%XMM5 |
(732) 0x3e55e VMULSD %XMM0,%XMM1,%XMM2 |
(732) 0x3e562 VMOVDDUP %XMM0,%XMM0 |
(732) 0x3e566 VUNPCKLPD %XMM1,%XMM2,%XMM8 |
(732) 0x3e56a VUNPCKLPD %XMM2,%XMM1,%XMM7 |
(732) 0x3e56e CLTQ |
(732) 0x3e570 VFMADD231PD %XMM8,%XMM3,%XMM5 |
(732) 0x3e575 VMOVHPD 0x48(%RBX),%XMM4,%XMM3 |
(732) 0x3e57a VFMADD231PD %XMM7,%XMM6,%XMM5 |
(732) 0x3e57f VFMADD213PD %XMM5,%XMM0,%XMM3 |
(732) 0x3e584 VMULPD (%RCX,%RAX,8),%XMM3,%XMM3 |
(732) 0x3e589 VUNPCKLPD %XMM0,%XMM1,%XMM0 |
(732) 0x3e58d VMULPD 0x60(%RBX),%XMM0,%XMM1 |
(732) 0x3e592 VMULPD 0x80(%RBX),%XMM0,%XMM0 |
(732) 0x3e59a VPERMILPD $0x1,%XMM3,%XMM4 |
(732) 0x3e5a0 VADDSD %XMM3,%XMM4,%XMM3 |
(732) 0x3e5a4 VPERMILPD $0x1,%XMM1,%XMM4 |
(732) 0x3e5aa VFMADD231SD 0x58(%RBX),%XMM2,%XMM1 |
(732) 0x3e5b0 VADDSD %XMM4,%XMM1,%XMM1 |
(732) 0x3e5b4 VADDSD 0x70(%RBX),%XMM1,%XMM1 |
(732) 0x3e5b9 VFMADD132SD 0x10(%RCX,%RAX,8),%XMM3,%XMM1 |
(732) 0x3e5c0 VPERMILPD $0x1,%XMM0,%XMM3 |
(732) 0x3e5c6 VFMADD231SD 0x78(%RBX),%XMM2,%XMM0 |
(732) 0x3e5cc VADDSD %XMM3,%XMM0,%XMM0 |
(732) 0x3e5d0 VADDSD 0x90(%RBX),%XMM0,%XMM0 |
(732) 0x3e5d8 VMOVSD -0x30(%RBP),%XMM3 |
(732) 0x3e5dd VFMADD132SD 0x18(%RCX,%RAX,8),%XMM1,%XMM0 |
(732) 0x3e5e4 MOV 0x90(%R15),%EAX |
(732) 0x3e5eb JMP 3e4d0 |
0x3e5f0 ADD $0x28,%RSP |
0x3e5f4 POP %RBX |
0x3e5f5 POP %R12 |
0x3e5f7 POP %R13 |
0x3e5f9 POP %R14 |
0x3e5fb POP %R15 |
0x3e5fd POP %RBP |
0x3e5fe RET |
0x3e5ff INT $0x3 |
Path / |
Source file and lines | OneBodyJastrowRef.h:133-155 |
Module | libqmcwfs.so |
nb instructions | 29 |
nb uops | 24 |
loop length | 91 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 2 |
micro-operation queue | 4.00 cycles |
front end | 4.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.00 | 1.00 | 1.00 | 1.50 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 1.50 | 1.00 | 1.00 | 1.00 | 1.50 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 4.00 |
Dispatch | 1.50 |
Overall L1 | 4.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x28,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x8(%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP (%RDX),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 3e5f0 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x260> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 3e3f2 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x62> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x28,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 |
Source file and lines | OneBodyJastrowRef.h:133-155 |
Module | libqmcwfs.so |
nb instructions | 29 |
nb uops | 24 |
loop length | 91 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 2 |
micro-operation queue | 4.00 cycles |
front end | 4.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.00 | 1.00 | 1.00 | 1.50 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 1.50 | 1.00 | 1.00 | 1.00 | 1.50 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 4.00 |
Dispatch | 1.50 |
Overall L1 | 4.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x28,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x8(%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP (%RDX),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 3e5f0 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x260> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 3e3f2 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x62> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x28,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::OneBodyJastrowRef | 0.01 | 0.01 |
▼Loop 731 - OneBodyJastrowRef.h:134-155 - libqmcwfs.so– | 0 | 0 |
○Loop 733 - OneBodyJastrowRef.h:143-146 - libqmcwfs.so | 0.01 | 0 |
○Loop 732 - OneBodyJastrowRef.h:151-155 - libqmcwfs.so | 0 | 0 |