Function: _ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRati ... | Module: libqmcwfs.so | Source: OneBodyJastrowRef.h:133-155 [...] | Coverage: 0.01% |
---|
Function: _ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRati ... | Module: libqmcwfs.so | Source: OneBodyJastrowRef.h:133-155 [...] | Coverage: 0.01% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 313 - 316 |
-------------------------------------------------------------------------------- |
313: inline int first(int igroup) const { return (*group_offsets_)[igroup]; } |
314: |
315: ///return the last index of a group i |
316: inline int last(int igroup) const { return (*group_offsets_)[igroup + 1]; } |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/OneBodyJastrowRef.h: 133 - 155 |
-------------------------------------------------------------------------------- |
133: { |
134: for (int k = 0; k < ratios.size(); ++k) |
135: ratios[k] = std::exp(Vat[VP.refPtcl] - computeU(VP.getDistTableAB(myTableID).getDistRow(k).data())); |
136: } |
137: |
138: inline valT computeU(const valT* dist) |
139: { |
140: valT curVat(0); |
141: if (NumGroups > 0) |
142: { |
143: for (int jg = 0; jg < NumGroups; ++jg) |
144: { |
145: if (F[jg] != nullptr) |
146: curVat += F[jg]->evaluateV(-1, Ions.first(jg), Ions.last(jg), dist, DistCompressed.data()); |
147: } |
148: } |
149: else |
150: { |
151: for (int c = 0; c < Nions; ++c) |
152: { |
153: int gid = Ions.GroupID[c]; |
154: if (F[gid] != nullptr) |
155: curVat += F[gid]->evaluate(dist[c]); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 249 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
229: return X[i]; |
[...] |
249: inline const_pointer data() const { return X; } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/shared_ptr_base.h: 1296 - 1296 |
-------------------------------------------------------------------------------- |
1296: { return _M_ptr; } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 919 - 1169 |
-------------------------------------------------------------------------------- |
919: { return size_type(this->_M_impl._M_finish - this->_M_impl._M_start); } |
[...] |
1046: return *(this->_M_impl._M_start + __n); |
[...] |
1064: return *(this->_M_impl._M_start + __n); |
[...] |
1169: { return _M_data_ptr(this->_M_impl._M_start); } |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 166 - 182 |
-------------------------------------------------------------------------------- |
166: if (r >= cutoff_radius) |
167: return 0.0; |
168: r *= DeltaRInv; |
169: real_type ipart, t; |
170: t = std::modf(r, &ipart); |
171: int i = (int)ipart; |
172: real_type tp[4]; |
173: tp[0] = t * t * t; |
[...] |
179: (SplineCoefs[i+0]*(A[ 0]*tp[0] + A[ 1]*tp[1] + A[ 2]*tp[2] + A[ 3]*tp[3])+ |
180: SplineCoefs[i+1]*(A[ 4]*tp[0] + A[ 5]*tp[1] + A[ 6]*tp[2] + A[ 7]*tp[3])+ |
181: SplineCoefs[i+2]*(A[ 8]*tp[0] + A[ 9]*tp[1] + A[10]*tp[2] + A[11]*tp[3])+ |
182: SplineCoefs[i+3]*(A[12]*tp[0] + A[13]*tp[1] + A[14]*tp[2] + A[15]*tp[3])); |
0x3e680 PUSH %RBP |
0x3e681 MOV %RSP,%RBP |
0x3e684 PUSH %R15 |
0x3e686 PUSH %R14 |
0x3e688 PUSH %R13 |
0x3e68a PUSH %R12 |
0x3e68c PUSH %RBX |
0x3e68d SUB $0x28,%RSP |
0x3e691 MOV 0x8(%RDX),%RAX |
0x3e695 MOV %RSI,-0x38(%RBP) |
0x3e699 MOV %RDX,-0x40(%RBP) |
0x3e69d CMP (%RDX),%RAX |
0x3e6a0 JE 3e8e0 |
0x3e6a6 MOV %RDI,%R15 |
0x3e6a9 XOR %R12D,%R12D |
0x3e6ac JMP 3e6e2 |
0x3e6ae XCHG %AX,%AX |
(731) 0x3e6b0 VMOVSD -0x48(%RBP),%XMM0 |
(731) 0x3e6b5 VSUBSD %XMM3,%XMM0,%XMM0 |
(731) 0x3e6b9 CALL 4efe0 <@plt_start@+0x640> |
(731) 0x3e6be MOV -0x40(%RBP),%RCX |
(731) 0x3e6c2 MOV (%RCX),%RAX |
(731) 0x3e6c5 VMOVSD %XMM0,(%RAX,%R12,8) |
(731) 0x3e6cb INC %R12 |
(731) 0x3e6ce MOV 0x8(%RCX),%RCX |
(731) 0x3e6d2 SUB %RAX,%RCX |
(731) 0x3e6d5 SAR $0x3,%RCX |
(731) 0x3e6d9 CMP %R12,%RCX |
(731) 0x3e6dc JBE 3e8e0 |
(731) 0x3e6e2 MOV -0x38(%RBP),%RDI |
(731) 0x3e6e6 MOV 0xf0(%R15),%RCX |
(731) 0x3e6ed MOV 0xa8(%R15),%ESI |
(731) 0x3e6f4 MOVSXD 0x2a0(%RDI),%RAX |
(731) 0x3e6fb VMOVSD (%RCX,%RAX,8),%XMM0 |
(731) 0x3e700 VMOVSD %XMM0,-0x48(%RBP) |
(731) 0x3e705 CALL 4ed60 <@plt_start@+0x3c0> |
(731) 0x3e70a MOV 0x48(%RAX),%RAX |
(731) 0x3e70e LEA (%R12,%R12,4),%RCX |
(731) 0x3e712 MOV 0x18(%RAX,%RCX,8),%R13 |
(731) 0x3e717 MOV 0x98(%R15),%EAX |
(731) 0x3e71e TEST %EAX,%EAX |
(731) 0x3e720 JLE 3e7a0 |
(731) 0x3e722 VXORPD %XMM3,%XMM3,%XMM3 |
(731) 0x3e726 XOR %EBX,%EBX |
(731) 0x3e728 JMP 3e73f |
0x3e72a NOPW (%RAX,%RAX,1) |
(733) 0x3e730 MOVSXD %EAX,%RCX |
(733) 0x3e733 INC %RBX |
(733) 0x3e736 CMP %RCX,%RBX |
(733) 0x3e739 JGE 3e6b0 |
(733) 0x3e73f MOV 0x1c8(%R15),%RCX |
(733) 0x3e746 MOV (%RCX,%RBX,8),%RDI |
(733) 0x3e74a TEST %RDI,%RDI |
(733) 0x3e74d JE 3e730 |
(733) 0x3e74f MOV 0xa0(%R15),%RAX |
(733) 0x3e756 MOV 0x148(%R15),%R9 |
(733) 0x3e75d MOV $-0x1,%ESI |
(733) 0x3e762 MOV %R13,%R8 |
(733) 0x3e765 VMOVSD %XMM3,-0x30(%RBP) |
(733) 0x3e76a MOV 0x268(%RAX),%RAX |
(733) 0x3e771 MOV 0x18(%RAX),%RAX |
(733) 0x3e775 MOV (%RAX,%RBX,4),%EDX |
(733) 0x3e778 MOV 0x4(%RAX,%RBX,4),%ECX |
(733) 0x3e77c CALL 30ac0 <_ZNK11qmcplusplus14BsplineFunctorIdE9evaluateVEiiiPKdPd> |
(733) 0x3e781 VMOVSD -0x30(%RBP),%XMM3 |
(733) 0x3e786 MOV 0x98(%R15),%EAX |
(733) 0x3e78d VADDSD %XMM3,%XMM0,%XMM3 |
(733) 0x3e791 JMP 3e730 |
0x3e793 NOPW %CS:(%RAX,%RAX,1) |
(731) 0x3e7a0 MOV 0x90(%R15),%EAX |
(731) 0x3e7a7 VXORPD %XMM3,%XMM3,%XMM3 |
(731) 0x3e7ab TEST %EAX,%EAX |
(731) 0x3e7ad JLE 3e6b0 |
(731) 0x3e7b3 XOR %R14D,%R14D |
(731) 0x3e7b6 JMP 3e7d3 |
0x3e7b8 NOPL (%RAX,%RAX,1) |
(732) 0x3e7c0 VADDSD %XMM3,%XMM0,%XMM3 |
(732) 0x3e7c4 INC %R14 |
(732) 0x3e7c7 MOVSXD %EAX,%RCX |
(732) 0x3e7ca CMP %RCX,%R14 |
(732) 0x3e7cd JGE 3e6b0 |
(732) 0x3e7d3 MOV 0xa0(%R15),%RCX |
(732) 0x3e7da MOV 0x1c8(%R15),%RDX |
(732) 0x3e7e1 MOV 0x18(%RCX),%RCX |
(732) 0x3e7e5 MOVSXD (%RCX,%R14,4),%RCX |
(732) 0x3e7e9 MOV (%RDX,%RCX,8),%RBX |
(732) 0x3e7ed TEST %RBX,%RBX |
(732) 0x3e7f0 JE 3e7c4 |
(732) 0x3e7f2 VMOVSD (%R13,%R14,8),%XMM1 |
(732) 0x3e7f9 VMOVSD 0x8(%RBX),%XMM2 |
(732) 0x3e7fe VXORPD %XMM0,%XMM0,%XMM0 |
(732) 0x3e802 VUCOMISD %XMM1,%XMM2 |
(732) 0x3e806 JBE 3e7c0 |
(732) 0x3e808 VMULSD 0x238(%RBX),%XMM1,%XMM0 |
(732) 0x3e810 LEA -0x50(%RBP),%RDI |
(732) 0x3e814 VMOVSD %XMM3,-0x30(%RBP) |
(732) 0x3e819 CALL 4ed70 <@plt_start@+0x3d0> |
(732) 0x3e81e VMOVUPD 0x18(%RBX),%XMM3 |
(732) 0x3e823 VMOVUPD 0x30(%RBX),%XMM5 |
(732) 0x3e828 VMULSD %XMM0,%XMM0,%XMM1 |
(732) 0x3e82c VCVTTSD2SI -0x50(%RBP),%EAX |
(732) 0x3e831 VMOVUPD 0x28(%RBX),%XMM4 |
(732) 0x3e836 MOV 0x218(%RBX),%RCX |
(732) 0x3e83d VBLENDPD $0x1,0x20(%RBX),%XMM5,%XMM6 |
(732) 0x3e844 VMOVHPD 0x40(%RBX),%XMM3,%XMM3 |
(732) 0x3e849 VMOVHPD 0x50(%RBX),%XMM5,%XMM5 |
(732) 0x3e84e VMULSD %XMM0,%XMM1,%XMM2 |
(732) 0x3e852 VMOVDDUP %XMM0,%XMM0 |
(732) 0x3e856 VUNPCKLPD %XMM1,%XMM2,%XMM8 |
(732) 0x3e85a VUNPCKLPD %XMM2,%XMM1,%XMM7 |
(732) 0x3e85e CLTQ |
(732) 0x3e860 VFMADD231PD %XMM8,%XMM3,%XMM5 |
(732) 0x3e865 VMOVHPD 0x48(%RBX),%XMM4,%XMM3 |
(732) 0x3e86a VFMADD231PD %XMM7,%XMM6,%XMM5 |
(732) 0x3e86f VFMADD213PD %XMM5,%XMM0,%XMM3 |
(732) 0x3e874 VMULPD (%RCX,%RAX,8),%XMM3,%XMM3 |
(732) 0x3e879 VUNPCKLPD %XMM0,%XMM1,%XMM0 |
(732) 0x3e87d VMULPD 0x60(%RBX),%XMM0,%XMM1 |
(732) 0x3e882 VMULPD 0x80(%RBX),%XMM0,%XMM0 |
(732) 0x3e88a VPERMILPD $0x1,%XMM3,%XMM4 |
(732) 0x3e890 VADDSD %XMM3,%XMM4,%XMM3 |
(732) 0x3e894 VPERMILPD $0x1,%XMM1,%XMM4 |
(732) 0x3e89a VFMADD231SD 0x58(%RBX),%XMM2,%XMM1 |
(732) 0x3e8a0 VADDSD %XMM4,%XMM1,%XMM1 |
(732) 0x3e8a4 VADDSD 0x70(%RBX),%XMM1,%XMM1 |
(732) 0x3e8a9 VFMADD132SD 0x10(%RCX,%RAX,8),%XMM3,%XMM1 |
(732) 0x3e8b0 VPERMILPD $0x1,%XMM0,%XMM3 |
(732) 0x3e8b6 VFMADD231SD 0x78(%RBX),%XMM2,%XMM0 |
(732) 0x3e8bc VADDSD %XMM3,%XMM0,%XMM0 |
(732) 0x3e8c0 VADDSD 0x90(%RBX),%XMM0,%XMM0 |
(732) 0x3e8c8 VMOVSD -0x30(%RBP),%XMM3 |
(732) 0x3e8cd VFMADD132SD 0x18(%RCX,%RAX,8),%XMM1,%XMM0 |
(732) 0x3e8d4 MOV 0x90(%R15),%EAX |
(732) 0x3e8db JMP 3e7c0 |
0x3e8e0 ADD $0x28,%RSP |
0x3e8e4 POP %RBX |
0x3e8e5 POP %R12 |
0x3e8e7 POP %R13 |
0x3e8e9 POP %R14 |
0x3e8eb POP %R15 |
0x3e8ed POP %RBP |
0x3e8ee RET |
0x3e8ef INT $0x3 |
Path / |
Source file and lines | OneBodyJastrowRef.h:133-155 |
Module | libqmcwfs.so |
nb instructions | 29 |
nb uops | 24 |
loop length | 91 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 2 |
micro-operation queue | 4.00 cycles |
front end | 4.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.00 | 1.00 | 1.00 | 1.50 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 1.50 | 1.00 | 1.00 | 1.00 | 1.50 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 4.00 |
Dispatch | 1.50 |
Overall L1 | 4.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x28,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x8(%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP (%RDX),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 3e8e0 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x260> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 3e6e2 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x62> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x28,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 |
Source file and lines | OneBodyJastrowRef.h:133-155 |
Module | libqmcwfs.so |
nb instructions | 29 |
nb uops | 24 |
loop length | 91 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 2 |
micro-operation queue | 4.00 cycles |
front end | 4.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.00 | 1.00 | 1.00 | 1.50 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 1.50 | 1.00 | 1.00 | 1.00 | 1.50 | 1.33 | 1.33 | 1.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 4.00 |
Dispatch | 1.50 |
Overall L1 | 4.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x28,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x8(%RDX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP (%RDX),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 3e8e0 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x260> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 3e6e2 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x62> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x28,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INT $0x3 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE– | 0.01 | 0.03 |
▼Loop 731 - OneBodyJastrowRef.h:134-155 - libqmcwfs.so– | 0.01 | 0.02 |
○Loop 732 - OneBodyJastrowRef.h:151-155 - libqmcwfs.so | 0 | 0 |
○Loop 733 - OneBodyJastrowRef.h:143-146 - libqmcwfs.so | 0 | 0.01 |