Function: qmcplusplus::SoaDistanceTableAAOMPTarget<double, 3u, 40>::evaluate(qmcplusplus::ParticleSe ... | Module: exec | Source: SoaDistanceTableAAOMPTarget.h:179-187 [...] | Coverage: 0.11% |
---|
Function: qmcplusplus::SoaDistanceTableAAOMPTarget<double, 3u, 40>::evaluate(qmcplusplus::ParticleSe ... | Module: exec | Source: SoaDistanceTableAAOMPTarget.h:179-187 [...] | Coverage: 0.11% |
---|
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 271 - 273 |
-------------------------------------------------------------------------------- |
271: inline T* restrict data(size_t i) { return myData + i * nGhosts; } |
272: ///return the const pointer of the i-th components |
273: inline const T* restrict data(size_t i) const { return myData + i * nGhosts; } |
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/stl_vector.h: 1124 - 1124 |
-------------------------------------------------------------------------------- |
1124: return *(this->_M_impl._M_start + __n); |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds3DSoa.h: 222 - 255 |
-------------------------------------------------------------------------------- |
222: const T x0 = pos[0]; |
223: const T y0 = pos[1]; |
224: const T z0 = pos[2]; |
[...] |
234: #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT) |
235: for (int iat = first; iat < last; ++iat) |
236: { |
237: T displ_0 = px[iat] - x0; |
238: T displ_1 = py[iat] - y0; |
239: T displ_2 = pz[iat] - z0; |
240: |
241: T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20; |
242: T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21; |
243: T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22; |
244: |
245: //put them in the box |
246: ar_0 -= round(ar_0); |
247: ar_1 -= round(ar_1); |
248: ar_2 -= round(ar_2); |
249: |
250: //unit2cart |
251: dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
252: dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
253: dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
254: |
255: temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]); |
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/unique_ptr.h: 199 - 199 |
-------------------------------------------------------------------------------- |
199: pointer _M_ptr() const noexcept { return std::get<0>(_M_t); } |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 248 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
248: inline pointer data() { return X; } |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Particle/SoaDistanceTableAAOMPTarget.h: 179 - 187 |
-------------------------------------------------------------------------------- |
179: inline void evaluate(ParticleSet& P) override |
180: { |
181: ScopedTimer local_timer(evaluate_timer_); |
182: |
183: constexpr T BigR = std::numeric_limits<T>::max(); |
184: for (int iat = 1; iat < num_targets_; ++iat) |
185: DTD_BConds<T, D, SC>::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), distances_[iat].data(), |
186: displacements_[iat], 0, iat, iat); |
187: } |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Particle/RealSpacePositionsOMPTarget.h: 163 - 163 |
-------------------------------------------------------------------------------- |
163: const PosVectorSoa& getAllParticlePos() const override { return RSoA_hostview; } |
/scratch_na/users/xoserete/qaas_runs/171-284-5202/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
0x54df70 PUSH %RBP |
0x54df71 MOV %RSP,%RBP |
0x54df74 PUSH %R15 |
0x54df76 PUSH %R14 |
0x54df78 PUSH %R13 |
0x54df7a MOV $0x1,%R13D |
0x54df80 PUSH %R12 |
0x54df82 PUSH %RBX |
0x54df83 MOV %RDI,%RBX |
0x54df86 AND $-0x20,%RSP |
0x54df8a SUB $0x20,%RSP |
0x54df8e MOV 0x278(%RDI),%RDI |
0x54df95 MOV %RSI,0x10(%RSP) |
0x54df9a MOV %RDI,(%RSP) |
0x54df9e CALL 50f9a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> |
0x54dfa3 MOV 0x18(%RBX),%RAX |
0x54dfa7 MOV %RAX,0x8(%RSP) |
0x54dfac CMP $0x1,%RAX |
0x54dfb0 JBE 54e4ef |
0x54dfb6 NOPW %CS:(%RAX,%RAX,1) |
(2358) 0x54dfc0 MOV 0x10(%RSP),%RSI |
(2358) 0x54dfc5 MOV 0x60(%RBX),%R14 |
(2358) 0x54dfc9 LEA (%R13,%R13,4),%RCX |
(2358) 0x54dfce MOV 0x48(%RBX),%RDX |
(2358) 0x54dfd2 SAL $0x3,%RCX |
(2358) 0x54dfd6 MOV 0x278(%RSI),%RDI |
(2358) 0x54dfdd ADD %RCX,%R14 |
(2358) 0x54dfe0 MOV 0x18(%RDX,%RCX,1),%R12 |
(2358) 0x54dfe5 MOV (%RDI),%R8 |
(2358) 0x54dfe8 MOV 0x48(%R8),%R15 |
(2358) 0x54dfec CMP $0x50be60,%R15 |
(2358) 0x54dff3 JNE 54e510 |
(2358) 0x54dff9 LEA 0x40(%RDI),%RAX |
(2358) 0x54dffd MOV 0x10(%RSP),%R11 |
(2358) 0x54e002 LEA (%R13,%R13,2),%R10 |
(2358) 0x54e007 MOV 0x8(%R14),%RSI |
(2358) 0x54e00b MOV 0x18(%RAX),%R9 |
(2358) 0x54e00f MOV 0x18(%R14),%RCX |
(2358) 0x54e013 MOV %R13D,%R14D |
(2358) 0x54e016 MOV 0x40(%R11),%RDI |
(2358) 0x54e01a SAL $0x3,%RSI |
(2358) 0x54e01e LEA (%RDI,%R10,8),%RDX |
(2358) 0x54e022 MOV 0x8(%RAX),%RDI |
(2358) 0x54e026 LEA -0x1(%R13),%EAX |
(2358) 0x54e02a VMOVSD (%RDX),%XMM4 |
(2358) 0x54e02e VMOVSD 0x8(%RDX),%XMM2 |
(2358) 0x54e033 SAL $0x3,%RDI |
(2358) 0x54e037 VMOVSD 0x10(%RDX),%XMM1 |
(2358) 0x54e03c LEA (%RCX,%RSI,1),%RDX |
(2358) 0x54e040 LEA (%R9,%RDI,1),%R8 |
(2358) 0x54e044 ADD %RDX,%RSI |
(2358) 0x54e047 ADD %R8,%RDI |
(2358) 0x54e04a CMP $0x2,%EAX |
(2358) 0x54e04d JBE 54e528 |
(2358) 0x54e053 MOV %R13D,%R15D |
(2358) 0x54e056 VBROADCASTSD 0x42121(%RIP),%YMM6 |
(2358) 0x54e05f VBROADCASTSD 0x3dc58(%RIP),%YMM5 |
(2358) 0x54e068 VBROADCASTSD %XMM4,%YMM9 |
(2358) 0x54e06d SHR $0x2,%R15D |
(2358) 0x54e071 VBROADCASTSD %XMM2,%YMM8 |
(2358) 0x54e076 VBROADCASTSD %XMM1,%YMM7 |
(2358) 0x54e07b XOR %R11D,%R11D |
(2358) 0x54e07e SAL $0x5,%R15 |
(2358) 0x54e082 NOPW (%RAX,%RAX,1) |
(2359) 0x54e088 VMOVAPD (%R8,%R11,1),%YMM10 |
(2359) 0x54e08e VMOVAPD (%R9,%R11,1),%YMM0 |
(2359) 0x54e094 VMOVAPD (%RDI,%R11,1),%YMM11 |
(2359) 0x54e09a VSUBPD %YMM8,%YMM10,%YMM13 |
(2359) 0x54e09f VSUBPD %YMM9,%YMM0,%YMM3 |
(2359) 0x54e0a4 VSUBPD %YMM7,%YMM11,%YMM0 |
(2359) 0x54e0a8 VMULPD 0x180(%RBX){1to4},%YMM13,%YMM14 |
(2359) 0x54e0af VMULPD 0x168(%RBX){1to4},%YMM13,%YMM15 |
(2359) 0x54e0b6 VMULPD 0x198(%RBX){1to4},%YMM13,%YMM10 |
(2359) 0x54e0bd VFMADD231PD 0x178(%RBX){1to4},%YMM3,%YMM14 |
(2359) 0x54e0c4 VMOVAPD %YMM15,%YMM12 |
(2359) 0x54e0c9 VFMADD231PD 0x160(%RBX){1to4},%YMM3,%YMM12 |
(2359) 0x54e0d0 VFMADD132PD 0x190(%RBX){1to4},%YMM10,%YMM3 |
(2359) 0x54e0d7 VMOVAPD %YMM14,%YMM11 |
(2359) 0x54e0dc VMOVAPD %YMM6,%YMM14 |
(2359) 0x54e0e0 VFMADD231PD 0x188(%RBX){1to4},%YMM0,%YMM11 |
(2359) 0x54e0e7 VFMADD231PD 0x170(%RBX){1to4},%YMM0,%YMM12 |
(2359) 0x54e0ee VFMADD132PD 0x1a0(%RBX){1to4},%YMM3,%YMM0 |
(2359) 0x54e0f5 VMOVAPD %YMM6,%YMM3 |
(2359) 0x54e0f9 VPTERNLOGQ $-0x8,%YMM5,%YMM11,%YMM14 |
(2359) 0x54e100 VADDPD %YMM14,%YMM11,%YMM10 |
(2359) 0x54e105 VPTERNLOGQ $-0x8,%YMM5,%YMM12,%YMM3 |
(2359) 0x54e10c VADDPD %YMM3,%YMM12,%YMM13 |
(2359) 0x54e110 VRNDSCALEPD $0x3,%YMM10,%YMM3 |
(2359) 0x54e117 VRNDSCALEPD $0x3,%YMM13,%YMM15 |
(2359) 0x54e11e VMOVAPD %YMM6,%YMM13 |
(2359) 0x54e122 VPTERNLOGQ $-0x8,%YMM5,%YMM0,%YMM13 |
(2359) 0x54e129 VSUBPD %YMM3,%YMM11,%YMM11 |
(2359) 0x54e12d VSUBPD %YMM15,%YMM12,%YMM12 |
(2359) 0x54e132 VADDPD %YMM13,%YMM0,%YMM15 |
(2359) 0x54e137 VMULPD 0x120(%RBX){1to4},%YMM11,%YMM10 |
(2359) 0x54e13e VRNDSCALEPD $0x3,%YMM15,%YMM14 |
(2359) 0x54e145 VFMADD231PD 0x118(%RBX){1to4},%YMM12,%YMM10 |
(2359) 0x54e14c VSUBPD %YMM14,%YMM0,%YMM0 |
(2359) 0x54e151 VFMADD231PD 0x128(%RBX){1to4},%YMM0,%YMM10 |
(2359) 0x54e158 VMOVAPD %YMM10,(%RCX,%R11,1) |
(2359) 0x54e15e VMULPD 0x138(%RBX){1to4},%YMM11,%YMM3 |
(2359) 0x54e165 VFMADD231PD 0x130(%RBX){1to4},%YMM12,%YMM3 |
(2359) 0x54e16c VFMADD231PD 0x140(%RBX){1to4},%YMM0,%YMM3 |
(2359) 0x54e173 VMOVAPD %YMM3,(%RDX,%R11,1) |
(2359) 0x54e179 VMULPD 0x150(%RBX){1to4},%YMM11,%YMM11 |
(2359) 0x54e180 VFMADD231PD 0x148(%RBX){1to4},%YMM12,%YMM11 |
(2359) 0x54e187 VFMADD132PD 0x158(%RBX){1to4},%YMM11,%YMM0 |
(2359) 0x54e18e VMOVAPD %YMM0,(%RSI,%R11,1) |
(2359) 0x54e194 VMOVAPD (%RDX,%R11,1),%YMM13 |
(2359) 0x54e19a VMOVAPD (%RCX,%R11,1),%YMM12 |
(2359) 0x54e1a0 VMULPD %YMM13,%YMM13,%YMM15 |
(2359) 0x54e1a5 VFMADD132PD %YMM12,%YMM15,%YMM12 |
(2359) 0x54e1aa VFMADD132PD %YMM0,%YMM12,%YMM0 |
(2359) 0x54e1af VSQRTPD %YMM0,%YMM14 |
(2359) 0x54e1b3 VMOVAPD %YMM14,(%R12,%R11,1) |
(2359) 0x54e1b9 ADD $0x20,%R11 |
(2359) 0x54e1bd CMP %R11,%R15 |
(2359) 0x54e1c0 JNE 54e088 |
(2358) 0x54e1c6 MOV %R14D,%R10D |
(2358) 0x54e1c9 AND $-0x4,%R10D |
(2358) 0x54e1cd MOV %R10D,%EAX |
(2358) 0x54e1d0 TEST $0x3,%R14B |
(2358) 0x54e1d4 JE 54e4db |
(2358) 0x54e1da SUB %R10D,%R14D |
(2358) 0x54e1dd MOV %R14D,0x1c(%RSP) |
(2358) 0x54e1e2 CMP $0x1,%R14D |
(2358) 0x54e1e6 JE 54e37c |
(2358) 0x54e1ec VMOVAPD (%R8,%R10,8),%XMM5 |
(2358) 0x54e1f2 VMOVAPD (%R9,%R10,8),%XMM6 |
(2358) 0x54e1f8 VMOVDDUP %XMM2,%XMM0 |
(2358) 0x54e1fc VMOVDDUP %XMM4,%XMM7 |
(2358) 0x54e200 VMOVDDUP 0x138(%RBX),%XMM10 |
(2358) 0x54e208 VMOVDDUP 0x130(%RBX),%XMM9 |
(2358) 0x54e210 VMOVDDUP %XMM1,%XMM3 |
(2358) 0x54e214 VMOVDDUP 0x150(%RBX),%XMM13 |
(2358) 0x54e21c VMOVDDUP 0x140(%RBX),%XMM8 |
(2358) 0x54e224 VMOVDDUP 0x148(%RBX),%XMM12 |
(2358) 0x54e22c VMOVDDUP 0x158(%RBX),%XMM11 |
(2358) 0x54e234 LEA (,%R10,8),%R11 |
(2358) 0x54e23c VSUBPD %XMM0,%XMM5,%XMM14 |
(2358) 0x54e240 VSUBPD %XMM7,%XMM6,%XMM15 |
(2358) 0x54e244 VMOVAPD (%RDI,%R10,8),%XMM7 |
(2358) 0x54e24a LEA (%RCX,%R11,1),%R15 |
(2358) 0x54e24e LEA (%RDX,%R11,1),%R14 |
(2358) 0x54e252 VSUBPD %XMM3,%XMM7,%XMM0 |
(2358) 0x54e256 VMULPD 0x168(%RBX){1to2},%XMM14,%XMM3 |
(2358) 0x54e25d VMULPD 0x180(%RBX){1to2},%XMM14,%XMM5 |
(2358) 0x54e264 VMULPD 0x198(%RBX){1to2},%XMM14,%XMM6 |
(2358) 0x54e26b VFMADD231PD 0x160(%RBX){1to2},%XMM15,%XMM3 |
(2358) 0x54e272 VFMADD231PD 0x178(%RBX){1to2},%XMM15,%XMM5 |
(2358) 0x54e279 VFMADD132PD 0x190(%RBX){1to2},%XMM6,%XMM15 |
(2358) 0x54e280 VMOVDDUP 0x41ef8(%RIP),%XMM6 |
(2358) 0x54e288 VFMADD231PD 0x170(%RBX){1to2},%XMM0,%XMM3 |
(2358) 0x54e28f VFMADD231PD 0x188(%RBX){1to2},%XMM0,%XMM5 |
(2358) 0x54e296 VFMADD132PD 0x1a0(%RBX){1to2},%XMM15,%XMM0 |
(2358) 0x54e29d VPTERNLOGQ $-0x8,0x41ec8(%RIP),%XMM3,%XMM6 |
(2358) 0x54e2a8 VADDPD %XMM6,%XMM3,%XMM15 |
(2358) 0x54e2ac VMOVDDUP 0x41ecc(%RIP),%XMM6 |
(2358) 0x54e2b4 VPTERNLOGQ $-0x8,0x41eb1(%RIP),%XMM5,%XMM6 |
(2358) 0x54e2bf VADDPD %XMM6,%XMM5,%XMM7 |
(2358) 0x54e2c3 VMOVDDUP 0x41eb5(%RIP),%XMM6 |
(2358) 0x54e2cb VPTERNLOGQ $-0x8,0x41e9a(%RIP),%XMM0,%XMM6 |
(2358) 0x54e2d6 VRNDSCALEPD $0x3,%XMM15,%XMM14 |
(2358) 0x54e2dd VRNDSCALEPD $0x3,%XMM7,%XMM15 |
(2358) 0x54e2e4 VSUBPD %XMM14,%XMM3,%XMM3 |
(2358) 0x54e2e9 VADDPD %XMM6,%XMM0,%XMM14 |
(2358) 0x54e2ed VSUBPD %XMM15,%XMM5,%XMM5 |
(2358) 0x54e2f2 VRNDSCALEPD $0x3,%XMM14,%XMM7 |
(2358) 0x54e2f9 VMULPD %XMM10,%XMM5,%XMM10 |
(2358) 0x54e2fe VMULPD 0x120(%RBX){1to2},%XMM5,%XMM15 |
(2358) 0x54e305 VFMADD132PD %XMM3,%XMM10,%XMM9 |
(2358) 0x54e30a VSUBPD %XMM7,%XMM0,%XMM0 |
(2358) 0x54e30e VFMADD231PD 0x118(%RBX){1to2},%XMM3,%XMM15 |
(2358) 0x54e315 VFMADD132PD %XMM0,%XMM9,%XMM8 |
(2358) 0x54e31a VMULPD %XMM13,%XMM5,%XMM9 |
(2358) 0x54e31f VFMADD231PD 0x128(%RBX){1to2},%XMM0,%XMM15 |
(2358) 0x54e326 VFMADD132PD %XMM12,%XMM9,%XMM3 |
(2358) 0x54e32b VMOVAPD %XMM15,(%R15) |
(2358) 0x54e330 VMOVAPD %XMM8,(%R14) |
(2358) 0x54e335 VFMADD132PD %XMM11,%XMM3,%XMM0 |
(2358) 0x54e33a VMOVAPD %XMM0,(%RSI,%R11,1) |
(2358) 0x54e340 VMOVAPD (%R14),%XMM12 |
(2358) 0x54e345 VMOVAPD (%R15),%XMM8 |
(2358) 0x54e34a VMULPD %XMM12,%XMM12,%XMM13 |
(2358) 0x54e34f VFMADD132PD %XMM8,%XMM13,%XMM8 |
(2358) 0x54e354 VFMADD132PD %XMM0,%XMM8,%XMM0 |
(2358) 0x54e359 VSQRTPD %XMM0,%XMM11 |
(2358) 0x54e35d VMOVAPD %XMM11,(%R12,%R11,1) |
(2358) 0x54e363 MOV 0x1c(%RSP),%R10D |
(2358) 0x54e368 TEST $0x1,%R10B |
(2358) 0x54e36c JE 54e4db |
(2358) 0x54e372 MOV %R10D,%R11D |
(2358) 0x54e375 AND $-0x2,%R11D |
(2358) 0x54e379 ADD %R11D,%EAX |
(2358) 0x54e37c CLTQ |
(2358) 0x54e37e VMOVSD (%R9,%RAX,8),%XMM3 |
(2358) 0x54e384 LEA (,%RAX,8),%R15 |
(2358) 0x54e38c ADD %R15,%RCX |
(2358) 0x54e38f ADD %R15,%RDX |
(2358) 0x54e392 VSUBSD %XMM4,%XMM3,%XMM5 |
(2358) 0x54e396 VMOVSD (%R8,%RAX,8),%XMM4 |
(2358) 0x54e39c VMOVQ 0x41ddc(%RIP),%XMM3 |
(2358) 0x54e3a4 VSUBSD %XMM2,%XMM4,%XMM6 |
(2358) 0x54e3a8 VMOVSD (%RDI,%RAX,8),%XMM2 |
(2358) 0x54e3ad VSUBSD %XMM1,%XMM2,%XMM14 |
(2358) 0x54e3b1 VMULSD 0x168(%RBX),%XMM6,%XMM1 |
(2358) 0x54e3b9 VMULSD 0x180(%RBX),%XMM6,%XMM0 |
(2358) 0x54e3c1 VMULSD 0x198(%RBX),%XMM6,%XMM7 |
(2358) 0x54e3c9 VFMADD231SD 0x160(%RBX),%XMM5,%XMM1 |
(2358) 0x54e3d2 VFMADD231SD 0x178(%RBX),%XMM5,%XMM0 |
(2358) 0x54e3db VFMADD132SD 0x190(%RBX),%XMM7,%XMM5 |
(2358) 0x54e3e4 VFMADD231SD 0x170(%RBX),%XMM14,%XMM1 |
(2358) 0x54e3ed VFMADD231SD 0x188(%RBX),%XMM14,%XMM0 |
(2358) 0x54e3f6 VFMADD132SD 0x1a0(%RBX),%XMM5,%XMM14 |
(2358) 0x54e3ff VPTERNLOGQ $-0x8,0x3d8b6(%RIP),%XMM1,%XMM3 |
(2358) 0x54e40a VADDSD %XMM3,%XMM1,%XMM15 |
(2358) 0x54e40e VMOVQ 0x41d6a(%RIP),%XMM3 |
(2358) 0x54e416 VPTERNLOGQ $-0x8,0x3d89f(%RIP),%XMM0,%XMM3 |
(2358) 0x54e421 VADDSD %XMM3,%XMM0,%XMM8 |
(2358) 0x54e425 VMOVQ 0x41d53(%RIP),%XMM3 |
(2358) 0x54e42d VRNDSCALESD $0x3,%XMM15,%XMM15,%XMM10 |
(2358) 0x54e434 VSUBSD %XMM10,%XMM1,%XMM9 |
(2358) 0x54e439 VPTERNLOGQ $-0x8,0x3d87c(%RIP),%XMM14,%XMM3 |
(2358) 0x54e444 VRNDSCALESD $0x3,%XMM8,%XMM8,%XMM12 |
(2358) 0x54e44b VSUBSD %XMM12,%XMM0,%XMM13 |
(2358) 0x54e450 VADDSD %XMM3,%XMM14,%XMM11 |
(2358) 0x54e454 VMULSD 0x120(%RBX),%XMM13,%XMM6 |
(2358) 0x54e45c VRNDSCALESD $0x3,%XMM11,%XMM11,%XMM5 |
(2358) 0x54e463 VSUBSD %XMM5,%XMM14,%XMM4 |
(2358) 0x54e467 VFMADD231SD 0x118(%RBX),%XMM9,%XMM6 |
(2358) 0x54e470 VFMADD231SD 0x128(%RBX),%XMM4,%XMM6 |
(2358) 0x54e479 VMOVSD %XMM6,(%RCX) |
(2358) 0x54e47d VMULSD 0x138(%RBX),%XMM13,%XMM2 |
(2358) 0x54e485 VFMADD231SD 0x130(%RBX),%XMM9,%XMM2 |
(2358) 0x54e48e VFMADD231SD 0x140(%RBX),%XMM4,%XMM2 |
(2358) 0x54e497 VMOVSD %XMM2,(%RDX) |
(2358) 0x54e49b VMULSD 0x150(%RBX),%XMM13,%XMM14 |
(2358) 0x54e4a3 VFMADD132SD 0x148(%RBX),%XMM14,%XMM9 |
(2358) 0x54e4ac VFMADD132SD 0x158(%RBX),%XMM9,%XMM4 |
(2358) 0x54e4b5 VMOVSD %XMM4,(%RSI,%R15,1) |
(2358) 0x54e4bb VMOVSD (%RDX),%XMM0 |
(2358) 0x54e4bf VMOVSD (%RCX),%XMM1 |
(2358) 0x54e4c3 VMULSD %XMM0,%XMM0,%XMM7 |
(2358) 0x54e4c7 VFMADD132SD %XMM1,%XMM7,%XMM1 |
(2358) 0x54e4cc VFMADD132SD %XMM4,%XMM1,%XMM4 |
(2358) 0x54e4d1 VSQRTSD %XMM4,%XMM4,%XMM4 |
(2358) 0x54e4d5 VMOVSD %XMM4,(%R12,%RAX,8) |
(2358) 0x54e4db MOV 0x8(%RSP),%R12 |
(2358) 0x54e4e0 INC %R13 |
(2358) 0x54e4e3 CMP %R12,%R13 |
(2358) 0x54e4e6 JB 54dfc0 |
0x54e4ec VZEROUPPER |
0x54e4ef MOV (%RSP),%RDI |
0x54e4f3 CALL 50fbd0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> |
0x54e4f8 LEA -0x28(%RBP),%RSP |
0x54e4fc POP %RBX |
0x54e4fd POP %R12 |
0x54e4ff POP %R13 |
0x54e501 POP %R14 |
0x54e503 POP %R15 |
0x54e505 POP %RBP |
0x54e506 RET |
0x54e507 NOPW (%RAX,%RAX,1) |
(2358) 0x54e510 VZEROUPPER |
(2358) 0x54e513 CALL %R15 |
(2358) 0x54e516 MOV 0x18(%RBX),%R9 |
(2358) 0x54e51a MOV %R9,0x8(%RSP) |
(2358) 0x54e51f JMP 54dffd |
0x54e524 NOPL (%RAX) |
(2358) 0x54e528 XOR %R10D,%R10D |
(2358) 0x54e52b XOR %EAX,%EAX |
(2358) 0x54e52d JMP 54e1da |
0x54e532 MOV %RAX,%RBX |
0x54e535 MOV (%RSP),%RDI |
0x54e539 VZEROUPPER |
0x54e53c CALL 50fbd0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> |
0x54e541 MOV %RBX,%RDI |
0x54e544 CALL 404230 <_Unwind_Resume@plt> |
0x54e549 NOPL (%RAX) |
Path / |
Source file and lines | SoaDistanceTableAAOMPTarget.h:179-187 |
Module | exec |
nb instructions | 40 |
nb uops | 46 |
loop length | 150 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 4 |
micro-operation queue | 7.67 cycles |
front end | 7.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 1.00 | 3.67 | 3.67 | 6.50 | 1.00 | 1.00 | 6.50 | 6.50 | 6.50 | 1.00 | 3.67 |
cycles | 1.00 | 1.00 | 3.67 | 3.67 | 6.50 | 1.00 | 1.00 | 6.50 | 6.50 | 6.50 | 1.00 | 3.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 6.77 |
Stall cycles | 0.00 |
Front-end | 7.67 |
Dispatch | 6.50 |
Overall L1 | 7.67 |
all | 33% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 66% |
all | 15% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV $0x1,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x278(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 50f9a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x1,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 54e4ef <_ZN11qmcplusplus27SoaDistanceTableAAOMPTargetIdLj3ELi40EE8evaluateERNS_11ParticleSetE+0x57f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV (%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 50fbd0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 50fbd0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 404230 <_Unwind_Resume@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | SoaDistanceTableAAOMPTarget.h:179-187 |
Module | exec |
nb instructions | 40 |
nb uops | 46 |
loop length | 150 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 4 |
micro-operation queue | 7.67 cycles |
front end | 7.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 1.00 | 3.67 | 3.67 | 6.50 | 1.00 | 1.00 | 6.50 | 6.50 | 6.50 | 1.00 | 3.67 |
cycles | 1.00 | 1.00 | 3.67 | 3.67 | 6.50 | 1.00 | 1.00 | 6.50 | 6.50 | 6.50 | 1.00 | 3.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 6.77 |
Stall cycles | 0.00 |
Front-end | 7.67 |
Dispatch | 6.50 |
Overall L1 | 7.67 |
all | 33% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 66% |
all | 15% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV $0x1,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x278(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 50f9a0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x1,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 54e4ef <_ZN11qmcplusplus27SoaDistanceTableAAOMPTargetIdLj3ELi40EE8evaluateERNS_11ParticleSetE+0x57f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV (%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 50fbd0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 50fbd0 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 404230 <_Unwind_Resume@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼qmcplusplus::SoaDistanceTableAAOMPTarget | 0.11 | 0.11 |
▼Loop 2358 - SoaDistanceTableAAOMPTarget.h:184-185 - exec– | 0 | 0 |
○Loop 2359 - ParticleBConds3DSoa.h:237-255 - exec | 0.11 | 0.09 |