Loop Id: 322 | Module: exec | Source: TwoBodyJastrowRef.h:107-132 [...] | Coverage: 0.01% |
---|
Loop Id: 322 | Module: exec | Source: TwoBodyJastrowRef.h:107-132 [...] | Coverage: 0.01% |
---|
0x23ee70 VXORPD %XMM14,%XMM14,%XMM14 |
0x23ee75 VMOVSD -0x58(%RBP),%XMM0 |
0x23ee7a VSUBSD %XMM14,%XMM0,%XMM0 |
0x23ee7f VZEROUPPER |
0x23ee82 CALL 2f6930 <@plt_start@+0x360> |
0x23ee87 MOV -0x50(%RBP),%RCX |
0x23ee8b MOV -0x60(%RBP),%R15 |
0x23ee8f MOV -0x40(%RBP),%R14 |
0x23ee93 MOV (%RCX),%RAX |
0x23ee96 VMOVSD %XMM0,(%RAX,%R15,8) |
0x23ee9c INC %R15 |
0x23ee9f MOV 0x8(%RCX),%RCX |
0x23eea3 SUB %RAX,%RCX |
0x23eea6 SAR $0x3,%RCX |
0x23eeaa CMP %R15,%RCX |
0x23eead MOV -0x48(%RBP),%RCX |
0x23eeb1 JBE 23f652 |
0x23eeb7 CMPB $0,0x298(%RCX) |
0x23eebe JE 23f664 |
0x23eec4 MOVSXD 0x2a0(%RCX),%R12 |
0x23eecb MOV 0xd8(%R14),%RAX |
0x23eed2 MOV 0x248(%R14),%ESI |
0x23eed9 MOV 0x290(%RCX),%RBX |
0x23eee0 MOV %RCX,%RDI |
0x23eee3 VMOVSD (%RAX,%R12,8),%XMM0 |
0x23eee9 VMOVSD %XMM0,-0x58(%RBP) |
0x23eeee CALL 27c670 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> |
0x23eef3 MOV 0xa0(%R14),%RDI |
0x23eefa MOV %R15,-0x60(%RBP) |
0x23eefe TEST %RDI,%RDI |
0x23ef01 JE 23ee70 |
0x23ef07 MOV 0x48(%RAX),%RAX |
0x23ef0b LEA (%R15,%R15,4),%RDX |
0x23ef0f MOV 0x200(%R14),%R10 |
0x23ef16 MOV 0x1d0(%R14),%RSI |
0x23ef1d VXORPD %XMM14,%XMM14,%XMM14 |
0x23ef22 XOR %R14D,%R14D |
0x23ef25 MOV %R12,-0x90(%RBP) |
0x23ef2c MOV %RDI,-0x88(%RBP) |
0x23ef33 MOV 0x18(%RAX,%RDX,8),%R8 |
0x23ef38 MOV 0x268(%RBX),%RDX |
0x23ef3f MOV 0x18(%RBX),%RAX |
0x23ef43 MOV %R10,-0x70(%RBP) |
0x23ef47 MOV 0x18(%RDX),%R13 |
0x23ef4b MOV (%RAX,%R12,4),%EAX |
0x23ef4f MOV %R8,-0x80(%RBP) |
0x23ef53 MOV (%R13),%ECX |
0x23ef57 IMUL %EDI,%EAX |
0x23ef5a MOV %R13,-0x68(%RBP) |
0x23ef5e MOVSXD %EAX,%R9 |
0x23ef61 MOV %R9,-0x78(%RBP) |
0x23ef65 JMP 23ef7d |
(323) 0x23ef70 VADDSD %XMM0,%XMM14,%XMM14 |
(323) 0x23ef74 CMP %RDI,%R14 |
(323) 0x23ef77 JE 23ee75 |
(323) 0x23ef7d MOV %R14,%RAX |
(323) 0x23ef80 MOV %ECX,%EDX |
(323) 0x23ef82 MOV 0x4(%R13,%RAX,4),%ECX |
(323) 0x23ef87 INC %R14 |
(323) 0x23ef8a VXORPD %XMM0,%XMM0,%XMM0 |
(323) 0x23ef8e MOV %ECX,%EBX |
(323) 0x23ef90 SUB %EDX,%EBX |
(323) 0x23ef92 JLE 23ef70 |
(323) 0x23ef94 ADD %R9,%RAX |
(323) 0x23ef97 MOVSXD %EDX,%RDX |
(323) 0x23ef9a MOV %R12,%R15 |
(323) 0x23ef9d MOV %R14,-0x38(%RBP) |
(323) 0x23efa1 MOV %EBX,%R14D |
(323) 0x23efa4 MOV %ECX,-0x2c(%RBP) |
(323) 0x23efa7 XOR %EBX,%EBX |
(323) 0x23efa9 MOV (%R10,%RAX,8),%RAX |
(323) 0x23efad LEA (%R8,%RDX,8),%R11 |
(323) 0x23efb1 SUB %RDX,%R15 |
(323) 0x23efb4 XOR %EDX,%EDX |
(323) 0x23efb6 VMOVSD 0x8(%RAX),%XMM0 |
(323) 0x23efbb JMP 23efc8 |
(326) 0x23efc0 INC %RDX |
(326) 0x23efc3 CMP %RDX,%R14 |
(326) 0x23efc6 JE 23eff0 |
(326) 0x23efc8 CMP %RDX,%R15 |
(326) 0x23efcb JE 23efc0 |
(326) 0x23efcd VMOVSD (%R11,%RDX,8),%XMM1 |
(326) 0x23efd3 VUCOMISD %XMM0,%XMM1 |
(326) 0x23efd7 JAE 23efc0 |
(326) 0x23efd9 MOVSXD %EBX,%RCX |
(326) 0x23efdc INC %EBX |
(326) 0x23efde VMOVSD %XMM1,(%RSI,%RCX,8) |
(326) 0x23efe3 JMP 23efc0 |
(323) 0x23eff0 MOV -0x2c(%RBP),%ECX |
(323) 0x23eff3 MOV -0x38(%RBP),%R14 |
(323) 0x23eff7 VXORPD %XMM0,%XMM0,%XMM0 |
(323) 0x23effb TEST %EBX,%EBX |
(323) 0x23effd JLE 23ef70 |
(323) 0x23f003 VMOVSD 0x238(%RAX),%XMM15 |
(323) 0x23f00b VMOVUPD 0x18(%RAX),%XMM2 |
(323) 0x23f010 VMOVSD 0x28(%RAX),%XMM16 |
(323) 0x23f017 VMOVSD 0x30(%RAX),%XMM20 |
(323) 0x23f01e VMOVSD 0x48(%RAX),%XMM3 |
(323) 0x23f023 VMOVSD 0x68(%RAX),%XMM4 |
(323) 0x23f028 VMOVSD 0x38(%RAX),%XMM5 |
(323) 0x23f02d VMOVSD 0x40(%RAX),%XMM6 |
(323) 0x23f032 VMOVSD 0x50(%RAX),%XMM9 |
(323) 0x23f037 VMOVSD 0x58(%RAX),%XMM7 |
(323) 0x23f03c VMOVSD 0x60(%RAX),%XMM10 |
(323) 0x23f041 VMOVSD 0x70(%RAX),%XMM11 |
(323) 0x23f046 VMOVSD 0x78(%RAX),%XMM30 |
(323) 0x23f04d VMOVSD 0x80(%RAX),%XMM31 |
(323) 0x23f054 VMOVSD 0x88(%RAX),%XMM17 |
(323) 0x23f05b VMOVSD 0x90(%RAX),%XMM18 |
(323) 0x23f062 MOV 0x218(%RAX),%R11 |
(323) 0x23f069 MOV %EBX,%R15D |
(323) 0x23f06c CMP $0x8,%EBX |
(323) 0x23f06f JAE 23f078 |
(323) 0x23f071 XOR %EBX,%EBX |
(323) 0x23f073 JMP 23f59c |
(323) 0x23f078 VMOVAPD %XMM17,%XMM0 |
(323) 0x23f07e VMOVAPD %XMM18,%XMM1 |
(323) 0x23f084 MOV %R15,%R14 |
(323) 0x23f087 SHR $0x3,%R14 |
(323) 0x23f08b VMOVAPD %XMM30,-0x160(%RBP) |
(323) 0x23f092 VMOVAPD %XMM31,-0x150(%RBP) |
(323) 0x23f099 VBROADCASTSD %XMM15,%YMM17 |
(323) 0x23f09f VBROADCASTSD %XMM2,%YMM18 |
(323) 0x23f0a5 VPERMPD $0x55,%YMM2,%YMM19 |
(323) 0x23f0ac VBROADCASTSD %XMM16,%YMM8 |
(323) 0x23f0b2 VBROADCASTSD %XMM20,%YMM21 |
(323) 0x23f0b8 VBROADCASTSD %XMM5,%YMM22 |
(323) 0x23f0be VBROADCASTSD %XMM6,%YMM23 |
(323) 0x23f0c4 VBROADCASTSD %XMM3,%YMM24 |
(323) 0x23f0ca VMOVAPD %XMM9,-0xd0(%RBP) |
(323) 0x23f0d2 VBROADCASTSD %XMM9,%YMM25 |
(323) 0x23f0d8 VBROADCASTSD %XMM7,%YMM26 |
(323) 0x23f0de VMOVAPD %XMM10,-0xc0(%RBP) |
(323) 0x23f0e6 VBROADCASTSD %XMM10,%YMM27 |
(323) 0x23f0ec VBROADCASTSD %XMM4,%YMM28 |
(323) 0x23f0f2 VBROADCASTSD %XMM11,%YMM29 |
(323) 0x23f0f8 VBROADCASTSD %XMM30,%YMM30 |
(323) 0x23f0fe VBROADCASTSD %XMM31,%YMM31 |
(323) 0x23f104 VBROADCASTSD %XMM0,%YMM9 |
(323) 0x23f109 VBROADCASTSD %XMM1,%YMM10 |
(323) 0x23f10e MOV %R15D,%EBX |
(323) 0x23f111 VMOVAPD %XMM3,-0x110(%RBP) |
(323) 0x23f119 VMOVAPD %XMM4,-0x100(%RBP) |
(323) 0x23f121 AND $-0x8,%EBX |
(323) 0x23f124 VXORPD %XMM4,%XMM4,%XMM4 |
(323) 0x23f128 VXORPD %XMM3,%XMM3,%XMM3 |
(323) 0x23f12c VMOVSD %XMM14,-0x98(%RBP) |
(323) 0x23f134 VMOVAPD %XMM15,-0x190(%RBP) |
(323) 0x23f13c VMOVAPD %XMM16,-0x180(%RBP) |
(323) 0x23f143 VMOVAPD %XMM20,-0x170(%RBP) |
(323) 0x23f14a VMOVAPD %XMM5,-0x120(%RBP) |
(323) 0x23f152 VMOVAPD %XMM6,-0xf0(%RBP) |
(323) 0x23f15a VMOVAPD %XMM7,-0xe0(%RBP) |
(323) 0x23f162 VMOVAPD %XMM11,-0xb0(%RBP) |
(323) 0x23f16a VMOVAPD %XMM0,-0x140(%RBP) |
(323) 0x23f172 VMOVAPD %XMM1,-0x130(%RBP) |
(323) 0x23f17a XOR %EAX,%EAX |
(323) 0x23f17c SAL $0x6,%R14 |
(324) 0x23f180 VMULPD (%RSI,%RAX,1),%YMM17,%YMM11 |
(324) 0x23f187 VMULPD 0x20(%RSI,%RAX,1),%YMM17,%YMM13 |
(324) 0x23f18f VMOVAPD %YMM8,%YMM20 |
(324) 0x23f195 ADD $0x40,%RAX |
(324) 0x23f199 VCVTTPD2DQ %YMM11,%XMM0 |
(324) 0x23f19e VCVTTPD2DQ %YMM13,%XMM1 |
(324) 0x23f1a3 VROUNDPD $0xb,%YMM11,%YMM12 |
(324) 0x23f1a9 VROUNDPD $0xb,%YMM13,%YMM14 |
(324) 0x23f1af VPMOVSXDQ %XMM0,%YMM5 |
(324) 0x23f1b4 VPMOVSXDQ %XMM1,%YMM6 |
(324) 0x23f1b9 VSUBPD %YMM12,%YMM11,%YMM12 |
(324) 0x23f1be VSUBPD %YMM14,%YMM13,%YMM11 |
(324) 0x23f1c3 VPEXTRQ $0x1,%XMM5,%RDX |
(324) 0x23f1c9 VMOVQ %XMM5,%RDI |
(324) 0x23f1ce VEXTRACTI128 $0x1,%YMM5,%XMM5 |
(324) 0x23f1d4 VMOVQ %XMM6,%R8 |
(324) 0x23f1d9 VPEXTRQ $0x1,%XMM6,%RCX |
(324) 0x23f1df VMOVQ %XMM5,%R12 |
(324) 0x23f1e4 VPEXTRQ $0x1,%XMM5,%R13 |
(324) 0x23f1ea VEXTRACTI128 $0x1,%YMM6,%XMM5 |
(324) 0x23f1f0 VMOVSD (%R11,%RDI,8),%XMM6 |
(324) 0x23f1f6 VMOVSD (%R11,%R8,8),%XMM7 |
(324) 0x23f1fc VMULPD %YMM12,%YMM12,%YMM16 |
(324) 0x23f202 VFMADD213PD %YMM21,%YMM11,%YMM20 |
(324) 0x23f208 VMULPD %YMM11,%YMM11,%YMM15 |
(324) 0x23f20d VMOVQ %XMM5,%R9 |
(324) 0x23f212 VPEXTRQ $0x1,%XMM5,%R10 |
(324) 0x23f218 VMOVSD (%R11,%R12,8),%XMM5 |
(324) 0x23f21e VMOVHPD (%R11,%RDX,8),%XMM6,%XMM6 |
(324) 0x23f224 VMOVHPD (%R11,%RCX,8),%XMM7,%XMM7 |
(324) 0x23f22a VMOVHPD (%R11,%R13,8),%XMM5,%XMM5 |
(324) 0x23f230 VMULPD %YMM12,%YMM16,%YMM14 |
(324) 0x23f236 VMULPD %YMM11,%YMM15,%YMM13 |
(324) 0x23f23b VFMADD231PD %YMM19,%YMM15,%YMM20 |
(324) 0x23f241 VFMADD231PD %YMM18,%YMM13,%YMM20 |
(324) 0x23f247 VINSERTF128 $0x1,%XMM5,%YMM6,%YMM5 |
(324) 0x23f24d VMOVSD (%R11,%R9,8),%XMM6 |
(324) 0x23f253 VMOVHPD (%R11,%R10,8),%XMM6,%XMM6 |
(324) 0x23f259 VINSERTF128 $0x1,%XMM6,%YMM7,%YMM6 |
(324) 0x23f25f VMOVAPD %YMM8,%YMM7 |
(324) 0x23f263 VFMADD213PD %YMM21,%YMM12,%YMM7 |
(324) 0x23f269 VFMADD213PD %YMM3,%YMM6,%YMM20 |
(324) 0x23f26f VMOVAPD %YMM24,%YMM6 |
(324) 0x23f275 VFMADD213PD %YMM25,%YMM11,%YMM6 |
(324) 0x23f27b VFMADD231PD %YMM19,%YMM16,%YMM7 |
(324) 0x23f281 VFMADD231PD %YMM23,%YMM15,%YMM6 |
(324) 0x23f287 VFMADD231PD %YMM18,%YMM14,%YMM7 |
(324) 0x23f28d VFMADD231PD %YMM22,%YMM13,%YMM6 |
(324) 0x23f293 VFMADD213PD %YMM4,%YMM5,%YMM7 |
(324) 0x23f298 VPCMPEQD %XMM4,%XMM4,%XMM4 |
(324) 0x23f29c VPSUBD %XMM4,%XMM0,%XMM3 |
(324) 0x23f2a0 VPSUBD %XMM4,%XMM1,%XMM4 |
(324) 0x23f2a4 VPMOVSXDQ %XMM3,%YMM3 |
(324) 0x23f2a9 VPMOVSXDQ %XMM4,%YMM4 |
(324) 0x23f2ae VMOVQ %XMM3,%RCX |
(324) 0x23f2b3 VPEXTRQ $0x1,%XMM3,%RDX |
(324) 0x23f2b9 VEXTRACTI128 $0x1,%YMM3,%XMM3 |
(324) 0x23f2bf VMOVQ %XMM4,%R9 |
(324) 0x23f2c4 VPEXTRQ $0x1,%XMM4,%R10 |
(324) 0x23f2ca VPEXTRQ $0x1,%XMM3,%RDI |
(324) 0x23f2d0 VMOVQ %XMM3,%R8 |
(324) 0x23f2d5 VEXTRACTI128 $0x1,%YMM4,%XMM3 |
(324) 0x23f2db VMOVSD (%R11,%RCX,8),%XMM4 |
(324) 0x23f2e1 VMOVSD (%R11,%R9,8),%XMM5 |
(324) 0x23f2e7 VMOVQ %XMM3,%R12 |
(324) 0x23f2ec VPEXTRQ $0x1,%XMM3,%R13 |
(324) 0x23f2f2 VMOVSD (%R11,%R8,8),%XMM3 |
(324) 0x23f2f8 VMOVHPD (%R11,%RDX,8),%XMM4,%XMM4 |
(324) 0x23f2fe VMOVHPD (%R11,%R10,8),%XMM5,%XMM5 |
(324) 0x23f304 VMOVHPD (%R11,%RDI,8),%XMM3,%XMM3 |
(324) 0x23f30a VINSERTF128 $0x1,%XMM3,%YMM4,%YMM3 |
(324) 0x23f310 VMOVSD (%R11,%R12,8),%XMM4 |
(324) 0x23f316 VMOVHPD (%R11,%R13,8),%XMM4,%XMM4 |
(324) 0x23f31c VINSERTF128 $0x1,%XMM4,%YMM5,%YMM4 |
(324) 0x23f322 VMOVAPD %YMM24,%YMM5 |
(324) 0x23f328 VFMADD213PD %YMM25,%YMM12,%YMM5 |
(324) 0x23f32e VFMADD213PD %YMM20,%YMM4,%YMM6 |
(324) 0x23f334 VMOVAPD %YMM28,%YMM20 |
(324) 0x23f33a VFMADD213PD %YMM29,%YMM11,%YMM20 |
(324) 0x23f340 VFMADD213PD %YMM10,%YMM9,%YMM11 |
(324) 0x23f345 VFMADD231PD %YMM23,%YMM16,%YMM5 |
(324) 0x23f34b VFMADD231PD %YMM27,%YMM15,%YMM20 |
(324) 0x23f351 VFMADD231PD %YMM15,%YMM31,%YMM11 |
(324) 0x23f357 VFMADD231PD %YMM22,%YMM14,%YMM5 |
(324) 0x23f35d VFMADD231PD %YMM26,%YMM13,%YMM20 |
(324) 0x23f363 VFMADD231PD %YMM13,%YMM30,%YMM11 |
(324) 0x23f369 VFMADD213PD %YMM7,%YMM3,%YMM5 |
(324) 0x23f36e VPBROADCASTD -0x3b297(%RIP),%XMM3 |
(324) 0x23f377 VPADDD %XMM3,%XMM0,%XMM4 |
(324) 0x23f37b VPADDD %XMM3,%XMM1,%XMM3 |
(324) 0x23f37f VPMOVSXDQ %XMM4,%YMM4 |
(324) 0x23f384 VPMOVSXDQ %XMM3,%YMM3 |
(324) 0x23f389 VMOVQ %XMM4,%RCX |
(324) 0x23f38e VPEXTRQ $0x1,%XMM4,%RDX |
(324) 0x23f394 VEXTRACTI128 $0x1,%YMM4,%XMM4 |
(324) 0x23f39a VMOVQ %XMM3,%R10 |
(324) 0x23f39f VPEXTRQ $0x1,%XMM3,%R9 |
(324) 0x23f3a5 VEXTRACTI128 $0x1,%YMM3,%XMM3 |
(324) 0x23f3ab VMOVQ %XMM4,%RDI |
(324) 0x23f3b0 VPEXTRQ $0x1,%XMM4,%R8 |
(324) 0x23f3b6 VMOVSD (%R11,%RCX,8),%XMM4 |
(324) 0x23f3bc VMOVQ %XMM3,%R12 |
(324) 0x23f3c1 VPEXTRQ $0x1,%XMM3,%R13 |
(324) 0x23f3c7 VMOVSD (%R11,%R10,8),%XMM7 |
(324) 0x23f3cd VMOVSD (%R11,%RDI,8),%XMM3 |
(324) 0x23f3d3 VMOVHPD (%R11,%RDX,8),%XMM4,%XMM4 |
(324) 0x23f3d9 VMOVHPD (%R11,%R9,8),%XMM7,%XMM7 |
(324) 0x23f3df VMOVHPD (%R11,%R8,8),%XMM3,%XMM3 |
(324) 0x23f3e5 VINSERTF128 $0x1,%XMM3,%YMM4,%YMM3 |
(324) 0x23f3eb VMOVSD (%R11,%R12,8),%XMM4 |
(324) 0x23f3f1 VMOVHPD (%R11,%R13,8),%XMM4,%XMM4 |
(324) 0x23f3f7 VINSERTF128 $0x1,%XMM4,%YMM7,%YMM4 |
(324) 0x23f3fd VMOVAPD %YMM28,%YMM7 |
(324) 0x23f403 VFMADD213PD %YMM29,%YMM12,%YMM7 |
(324) 0x23f409 VFMADD213PD %YMM10,%YMM9,%YMM12 |
(324) 0x23f40e VFMADD213PD %YMM6,%YMM4,%YMM20 |
(324) 0x23f414 VFMADD231PD %YMM27,%YMM16,%YMM7 |
(324) 0x23f41a VFMADD231PD %YMM16,%YMM31,%YMM12 |
(324) 0x23f420 VFMADD231PD %YMM26,%YMM14,%YMM7 |
(324) 0x23f426 VFMADD231PD %YMM14,%YMM30,%YMM12 |
(324) 0x23f42c VFMADD213PD %YMM5,%YMM3,%YMM7 |
(324) 0x23f431 VPBROADCASTD -0x3b372(%RIP),%XMM3 |
(324) 0x23f43a VMOVAPD %YMM12,%YMM4 |
(324) 0x23f43e VPADDD %XMM3,%XMM0,%XMM0 |
(324) 0x23f442 VPADDD %XMM3,%XMM1,%XMM1 |
(324) 0x23f446 VPMOVSXDQ %XMM0,%YMM0 |
(324) 0x23f44b VPMOVSXDQ %XMM1,%YMM1 |
(324) 0x23f450 VMOVQ %XMM0,%RCX |
(324) 0x23f455 VPEXTRQ $0x1,%XMM0,%RDX |
(324) 0x23f45b VEXTRACTI128 $0x1,%YMM0,%XMM0 |
(324) 0x23f461 VMOVQ %XMM1,%R9 |
(324) 0x23f466 VPEXTRQ $0x1,%XMM1,%R10 |
(324) 0x23f46c VPEXTRQ $0x1,%XMM0,%RDI |
(324) 0x23f472 VMOVQ %XMM0,%R8 |
(324) 0x23f477 VEXTRACTI128 $0x1,%YMM1,%XMM0 |
(324) 0x23f47d VMOVSD (%R11,%RCX,8),%XMM1 |
(324) 0x23f483 VMOVSD (%R11,%R9,8),%XMM3 |
(324) 0x23f489 VMOVQ %XMM0,%R12 |
(324) 0x23f48e VPEXTRQ $0x1,%XMM0,%R13 |
(324) 0x23f494 VMOVSD (%R11,%R8,8),%XMM0 |
(324) 0x23f49a VMOVHPD (%R11,%RDX,8),%XMM1,%XMM1 |
(324) 0x23f4a0 VMOVHPD (%R11,%R10,8),%XMM3,%XMM3 |
(324) 0x23f4a6 VMOVHPD (%R11,%RDI,8),%XMM0,%XMM0 |
(324) 0x23f4ac VINSERTF128 $0x1,%XMM0,%YMM1,%YMM0 |
(324) 0x23f4b2 VMOVSD (%R11,%R12,8),%XMM1 |
(324) 0x23f4b8 VMOVHPD (%R11,%R13,8),%XMM1,%XMM1 |
(324) 0x23f4be VFMADD213PD %YMM7,%YMM0,%YMM4 |
(324) 0x23f4c3 VINSERTF128 $0x1,%XMM1,%YMM3,%YMM1 |
(324) 0x23f4c9 VMOVAPD %YMM11,%YMM3 |
(324) 0x23f4cd VFMADD213PD %YMM20,%YMM1,%YMM3 |
(324) 0x23f4d3 CMP %RAX,%R14 |
(324) 0x23f4d6 JNE 23f180 |
(323) 0x23f4dc VADDPD %YMM4,%YMM3,%YMM0 |
(323) 0x23f4e0 VMOVSD -0x98(%RBP),%XMM14 |
(323) 0x23f4e8 VMOVAPD -0x190(%RBP),%XMM15 |
(323) 0x23f4f0 VMOVAPD -0x180(%RBP),%XMM16 |
(323) 0x23f4f7 VMOVAPD -0x170(%RBP),%XMM20 |
(323) 0x23f4fe VMOVAPD -0x160(%RBP),%XMM30 |
(323) 0x23f505 VMOVAPD -0x150(%RBP),%XMM31 |
(323) 0x23f50c VMOVAPD -0x140(%RBP),%XMM17 |
(323) 0x23f513 VMOVAPD -0x130(%RBP),%XMM18 |
(323) 0x23f51a VMOVAPD -0x120(%RBP),%XMM5 |
(323) 0x23f522 VMOVAPD -0x110(%RBP),%XMM3 |
(323) 0x23f52a VMOVAPD -0x100(%RBP),%XMM4 |
(323) 0x23f532 VMOVAPD -0xf0(%RBP),%XMM6 |
(323) 0x23f53a VMOVAPD -0xe0(%RBP),%XMM7 |
(323) 0x23f542 VMOVAPD -0xd0(%RBP),%XMM9 |
(323) 0x23f54a VMOVAPD -0xc0(%RBP),%XMM10 |
(323) 0x23f552 VMOVAPD -0xb0(%RBP),%XMM11 |
(323) 0x23f55a MOV -0x90(%RBP),%R12 |
(323) 0x23f561 MOV -0x88(%RBP),%RDI |
(323) 0x23f568 MOV -0x80(%RBP),%R8 |
(323) 0x23f56c MOV -0x78(%RBP),%R9 |
(323) 0x23f570 MOV -0x70(%RBP),%R10 |
(323) 0x23f574 MOV -0x68(%RBP),%R13 |
(323) 0x23f578 MOV -0x2c(%RBP),%ECX |
(323) 0x23f57b MOV -0x38(%RBP),%R14 |
(323) 0x23f57f VEXTRACTF128 $0x1,%YMM0,%XMM1 |
(323) 0x23f585 VADDPD %XMM1,%XMM0,%XMM0 |
(323) 0x23f589 VPERMILPD $0x1,%XMM0,%XMM1 |
(323) 0x23f58f VADDSD %XMM1,%XMM0,%XMM0 |
(323) 0x23f593 CMP %R15,%RBX |
(323) 0x23f596 JE 23ef70 |
(323) 0x23f59c VUNPCKLPD %XMM4,%XMM3,%XMM1 |
(323) 0x23f5a0 VUNPCKLPD %XMM11,%XMM9,%XMM3 |
(323) 0x23f5a5 VUNPCKLPD %XMM10,%XMM6,%XMM4 |
(323) 0x23f5aa VUNPCKLPD %XMM7,%XMM5,%XMM9 |
(323) 0x23f5ae XCHG %AX,%AX |
(325) 0x23f5b0 VMULSD (%RSI,%RBX,8),%XMM15,%XMM5 |
(325) 0x23f5b5 VMOVAPD %XMM16,%XMM12 |
(325) 0x23f5bb INC %RBX |
(325) 0x23f5be VROUNDSD $0xb,%XMM5,%XMM5,%XMM6 |
(325) 0x23f5c4 VCVTTSD2SI %XMM5,%EAX |
(325) 0x23f5c8 CLTQ |
(325) 0x23f5ca VSUBSD %XMM6,%XMM5,%XMM5 |
(325) 0x23f5ce VMULSD %XMM5,%XMM5,%XMM6 |
(325) 0x23f5d2 VFMADD213SD %XMM20,%XMM5,%XMM12 |
(325) 0x23f5d8 VMULSD %XMM5,%XMM6,%XMM7 |
(325) 0x23f5dc VMOVDDUP %XMM6,%XMM10 |
(325) 0x23f5e0 VMOVDDUP %XMM7,%XMM8 |
(325) 0x23f5e4 VUNPCKLPD %XMM10,%XMM8,%XMM11 |
(325) 0x23f5e9 VMULPD %XMM2,%XMM11,%XMM11 |
(325) 0x23f5ed VPERMILPD $0x1,%XMM11,%XMM13 |
(325) 0x23f5f3 VADDSD %XMM11,%XMM12,%XMM11 |
(325) 0x23f5f8 VADDSD %XMM13,%XMM11,%XMM11 |
(325) 0x23f5fd VMOVDDUP %XMM5,%XMM13 |
(325) 0x23f601 VFMADD213SD %XMM18,%XMM17,%XMM5 |
(325) 0x23f607 VFMADD132SD (%R11,%RAX,8),%XMM0,%XMM11 |
(325) 0x23f60d VFMADD213PD %XMM3,%XMM1,%XMM13 |
(325) 0x23f612 VFMADD231SD %XMM6,%XMM31,%XMM5 |
(325) 0x23f618 VFMADD231PD %XMM10,%XMM4,%XMM13 |
(325) 0x23f61d VFMADD231SD %XMM7,%XMM30,%XMM5 |
(325) 0x23f623 VFMADD231PD %XMM8,%XMM9,%XMM13 |
(325) 0x23f628 VMULPD 0x8(%R11,%RAX,8),%XMM13,%XMM8 |
(325) 0x23f62f VPERMILPD $0x1,%XMM8,%XMM0 |
(325) 0x23f635 VADDSD %XMM0,%XMM8,%XMM0 |
(325) 0x23f639 VADDSD %XMM0,%XMM11,%XMM0 |
(325) 0x23f63d VFMADD231SD 0x18(%R11,%RAX,8),%XMM5,%XMM0 |
(325) 0x23f644 CMP %RBX,%R15 |
(325) 0x23f647 JNE 23f5b0 |
(323) 0x23f64d JMP 23ef70 |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/optional: 433 - 950 |
-------------------------------------------------------------------------------- |
433: { return static_cast<const _Dp*>(this)->_M_payload._M_engaged; } |
[...] |
950: if (this->_M_is_engaged()) |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 313 - 316 |
-------------------------------------------------------------------------------- |
313: inline int first(int igroup) const { return (*group_offsets_)[igroup]; } |
314: |
315: ///return the last index of a group i |
316: inline int last(int igroup) const { return (*group_offsets_)[igroup + 1]; } |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 229 - 229 |
-------------------------------------------------------------------------------- |
229: return X[i]; |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 919 - 1064 |
-------------------------------------------------------------------------------- |
919: { return size_type(this->_M_impl._M_finish - this->_M_impl._M_start); } |
[...] |
1046: return *(this->_M_impl._M_start + __n); |
[...] |
1064: return *(this->_M_impl._M_start + __n); |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/refwrap.h: 338 - 338 |
-------------------------------------------------------------------------------- |
338: { return *_M_data; } |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowRef.h: 107 - 132 |
-------------------------------------------------------------------------------- |
107: for (int k = 0; k < ratios.size(); ++k) |
108: ratios[k] = std::exp(Uat[VP.refPtcl] - computeU(VP.getRefPS(), VP.refPtcl, VP.getDistTableAB(myTableID).getDistRow(k).data())); |
[...] |
126: const int igt = P.GroupID[iat] * NumGroups; |
127: for (int jg = 0; jg < NumGroups; ++jg) |
128: { |
129: const FuncType& f2(*F[igt + jg]); |
130: int iStart = P.first(jg); |
131: int iEnd = P.last(jg); |
132: curUat += f2.evaluateV(iat, iStart, iEnd, dist, DistCompressed.data()); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 236 - 260 |
-------------------------------------------------------------------------------- |
236: for (int jat = 0; jat < iLimit; jat++) |
237: { |
238: real_type r = distArray[jat]; |
239: // pick the distances smaller than the cutoff and avoid the reference atom |
240: if (r < cutoff_radius && iStart + jat != iat) |
241: distArrayCompressed[iCount++] = distArray[jat]; |
242: } |
243: |
244: real_type d = 0.0; |
245: //#pragma omp simd reduction(+:d) |
246: for (int jat = 0; jat < iCount; jat++) |
247: { |
248: real_type r = distArrayCompressed[jat]; |
249: r *= DeltaRInv; |
250: int i = (int)r; |
251: real_type t = r - real_type(i); |
252: real_type tp0 = t * t * t; |
253: real_type tp1 = t * t; |
254: real_type tp2 = t; |
255: |
256: real_type d1 = SplineCoefs[i + 0] * (A[0] * tp0 + A[1] * tp1 + A[2] * tp2 + A[3]); |
257: real_type d2 = SplineCoefs[i + 1] * (A[4] * tp0 + A[5] * tp1 + A[6] * tp2 + A[7]); |
258: real_type d3 = SplineCoefs[i + 2] * (A[8] * tp0 + A[9] * tp1 + A[10] * tp2 + A[11]); |
259: real_type d4 = SplineCoefs[i + 3] * (A[12] * tp0 + A[13] * tp1 + A[14] * tp2 + A[15]); |
260: d += (d1 + d2 + d3 + d4); |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 4.25 |
CQA speedup if FP arith vectorized | 1.70 |
CQA speedup if fully vectorized | 5.39 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.26 |
Bottlenecks | P5, P6, P7, |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source | optional:433-433,optional:950-950,ParticleSet.h:313-313,OhmmsVector.h:229-229,stl_vector.h:919-919,stl_vector.h:1046-1046,refwrap.h:338-338,TwoBodyJastrowRef.h:107-108,TwoBodyJastrowRef.h:126-127 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 11.33 |
CQA cycles if no scalar integer | 2.67 |
CQA cycles if FP arith vectorized | 6.67 |
CQA cycles if fully vectorized | 2.10 |
Front-end cycles | 9.00 |
DIV/SQRT cycles | 3.25 |
P0 cycles | 3.25 |
P1 cycles | 3.25 |
P2 cycles | 3.25 |
P3 cycles | 3.00 |
P4 cycles | 11.33 |
P5 cycles | 11.33 |
P6 cycles | 11.33 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 0.50 |
P10 cycles | 0.50 |
P11 cycles | 1.00 |
P12 cycles | 1.00 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 52.00 |
Nb uops | 54.00 |
Nb loads | 23.00 |
Nb stores | 9.00 |
Nb stack references | 11.00 |
FLOP/cycle | 0.09 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 20.56 |
Bytes prefetched | 0.00 |
Bytes loaded | 161.00 |
Bytes stored | 72.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 13.64 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 50.00 |
Vector-efficiency ratio all | 13.42 |
Vector-efficiency ratio load | 10.04 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 16.93 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 4.25 |
CQA speedup if FP arith vectorized | 1.70 |
CQA speedup if fully vectorized | 5.39 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.26 |
Bottlenecks | P5, P6, P7, |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source | optional:433-433,optional:950-950,ParticleSet.h:313-313,OhmmsVector.h:229-229,stl_vector.h:919-919,stl_vector.h:1046-1046,refwrap.h:338-338,TwoBodyJastrowRef.h:107-108,TwoBodyJastrowRef.h:126-127 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 11.33 |
CQA cycles if no scalar integer | 2.67 |
CQA cycles if FP arith vectorized | 6.67 |
CQA cycles if fully vectorized | 2.10 |
Front-end cycles | 9.00 |
DIV/SQRT cycles | 3.25 |
P0 cycles | 3.25 |
P1 cycles | 3.25 |
P2 cycles | 3.25 |
P3 cycles | 3.00 |
P4 cycles | 11.33 |
P5 cycles | 11.33 |
P6 cycles | 11.33 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 0.50 |
P10 cycles | 0.50 |
P11 cycles | 1.00 |
P12 cycles | 1.00 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 52.00 |
Nb uops | 54.00 |
Nb loads | 23.00 |
Nb stores | 9.00 |
Nb stack references | 11.00 |
FLOP/cycle | 0.09 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 20.56 |
Bytes prefetched | 0.00 |
Bytes loaded | 161.00 |
Bytes stored | 72.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 13.64 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 50.00 |
Vector-efficiency ratio all | 13.42 |
Vector-efficiency ratio load | 10.04 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 16.93 |
Path / |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source file and lines | TwoBodyJastrowRef.h:107-132 |
Module | exec |
nb instructions | 52 |
nb uops | 54 |
loop length | 247 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 9.00 cycles |
front end | 9.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.25 | 3.25 | 3.25 | 3.25 | 3.00 | 11.33 | 11.33 | 11.33 | 0.00 | 0.00 | 0.50 | 0.50 | 1.00 | 1.00 |
cycles | 3.25 | 3.25 | 3.25 | 3.25 | 3.00 | 11.33 | 11.33 | 11.33 | 0.00 | 0.00 | 0.50 | 0.50 | 1.00 | 1.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 9.00 |
Dispatch | 11.33 |
Overall L1 | 11.33 |
all | 6% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 25% |
all | 28% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 13% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 12% |
load | 9% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 16% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 13% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD -0x58(%RBP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VSUBSD %XMM14,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 2f6930 <@plt_start@+0x360> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x50(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x60(%RBP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x40(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RCX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD %XMM0,(%RAX,%R15,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
INC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %RAX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAR $0x3,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP %R15,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x48(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
JBE 23f652 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x822> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
CMPB $0,0x298(%RCX) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 23f664 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x834> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOVSXD 0x2a0(%RCX),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV 0xd8(%R14),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x248(%R14),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x290(%RCX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD (%RAX,%R12,8),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM0,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
CALL 27c670 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xa0(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R15,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
TEST %RDI,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 23ee70 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x40> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x48(%RAX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%R15,%R15,4),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x200(%R14),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x1d0(%R14),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R12,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDI,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x18(%RAX,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x268(%RBX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RBX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R10,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x18(%RDX),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX,%R12,4),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R8,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R13),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %EDI,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R13,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD %EAX,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 23ef7d <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x14d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source file and lines | TwoBodyJastrowRef.h:107-132 |
Module | exec |
nb instructions | 52 |
nb uops | 54 |
loop length | 247 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 9.00 cycles |
front end | 9.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.25 | 3.25 | 3.25 | 3.25 | 3.00 | 11.33 | 11.33 | 11.33 | 0.00 | 0.00 | 0.50 | 0.50 | 1.00 | 1.00 |
cycles | 3.25 | 3.25 | 3.25 | 3.25 | 3.00 | 11.33 | 11.33 | 11.33 | 0.00 | 0.00 | 0.50 | 0.50 | 1.00 | 1.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 9.00 |
Dispatch | 11.33 |
Overall L1 | 11.33 |
all | 6% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 25% |
all | 28% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 13% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 12% |
load | 9% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 16% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 13% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD -0x58(%RBP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VSUBSD %XMM14,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 2f6930 <@plt_start@+0x360> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x50(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x60(%RBP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x40(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RCX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD %XMM0,(%RAX,%R15,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
INC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %RAX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAR $0x3,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP %R15,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x48(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
JBE 23f652 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x822> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
CMPB $0,0x298(%RCX) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 23f664 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x834> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOVSXD 0x2a0(%RCX),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV 0xd8(%R14),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x248(%R14),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x290(%RCX),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD (%RAX,%R12,8),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM0,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
CALL 27c670 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xa0(%R14),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R15,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
TEST %RDI,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 23ee70 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x40> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x48(%RAX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%R15,%R15,4),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x200(%R14),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x1d0(%R14),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R12,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDI,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x18(%RAX,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x268(%RBX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RBX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R10,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x18(%RDX),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RAX,%R12,4),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R8,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%R13),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %EDI,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R13,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD %EAX,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 23ef7d <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x14d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |