Loop Id: 227 | Module: exec | Source: OneBodyJastrowRef.h:134-155 [...] | Coverage: 0.02% |
---|
Loop Id: 227 | Module: exec | Source: OneBodyJastrowRef.h:134-155 [...] | Coverage: 0.02% |
---|
0x238030 VMOVSD -0x48(%RBP),%XMM0 |
0x238035 VSUBSD %XMM14,%XMM0,%XMM0 |
0x23803a VZEROUPPER |
0x23803d CALL 2f6670 <@plt_start@+0x360> |
0x238042 MOV -0x40(%RBP),%RCX |
0x238046 MOV -0x50(%RBP),%RBX |
0x23804a MOV (%RCX),%RAX |
0x23804d VMOVSD %XMM0,(%RAX,%RBX,8) |
0x238052 INC %RBX |
0x238055 MOV 0x8(%RCX),%RCX |
0x238059 SUB %RAX,%RCX |
0x23805c SAR $0x3,%RCX |
0x238060 CMP %RBX,%RCX |
0x238063 JBE 238928 |
0x238069 MOV -0x38(%RBP),%RDI |
0x23806d MOV 0xf0(%R15),%RCX |
0x238074 MOV 0xa8(%R15),%ESI |
0x23807b MOVSXD 0x2a0(%RDI),%RAX |
0x238082 VMOVSD (%RCX,%RAX,8),%XMM0 |
0x238087 VMOVSD %XMM0,-0x48(%RBP) |
0x23808c CALL 27c3b0 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> |
0x238091 MOV 0x48(%RAX),%RAX |
0x238095 LEA (%RBX,%RBX,4),%RCX |
0x238099 MOV %RBX,-0x50(%RBP) |
0x23809d MOV 0x18(%RAX,%RCX,8),%R12 |
0x2380a2 MOV 0x98(%R15),%ECX |
0x2380a9 TEST %ECX,%ECX |
0x2380ab JLE 2387e0 |
0x2380b1 MOV 0x1c8(%R15),%RDX |
0x2380b8 MOV 0xa0(%R15),%R13 |
0x2380bf MOV 0x148(%R15),%RSI |
0x2380c6 VXORPD %XMM14,%XMM14,%XMM14 |
0x2380cb XOR %EDI,%EDI |
0x2380cd MOV %R12,-0x78(%RBP) |
0x2380d1 MOV %RCX,-0x70(%RBP) |
0x2380d5 MOV %RDX,-0x68(%RBP) |
0x2380d9 MOV %R13,-0x60(%RBP) |
0x2380dd JMP 2380f0 |
(229) 0x2380e0 VADDSD %XMM0,%XMM14,%XMM14 |
(229) 0x2380e4 INC %RDI |
(229) 0x2380e7 CMP %RCX,%RDI |
(229) 0x2380ea JE 238030 |
(229) 0x2380f0 MOV (%RDX,%RDI,8),%R9 |
(229) 0x2380f4 TEST %R9,%R9 |
(229) 0x2380f7 JE 2380e4 |
(229) 0x2380f9 MOV 0x268(%R13),%R8 |
(229) 0x238100 VXORPD %XMM0,%XMM0,%XMM0 |
(229) 0x238104 MOV 0x18(%R8),%R8 |
(229) 0x238108 MOV (%R8,%RDI,4),%R8 |
(229) 0x23810c MOV %R8,%R10 |
(229) 0x23810f SHR $0x20,%R10 |
(229) 0x238113 SUB %R8D,%R10D |
(229) 0x238116 JLE 2380e0 |
(229) 0x238118 VMOVSD 0x8(%R9),%XMM1 |
(229) 0x23811e MOVSXD %R8D,%R8 |
(229) 0x238121 MOV %R10D,%EBX |
(229) 0x238124 XOR %R14D,%R14D |
(229) 0x238127 XOR %R10D,%R10D |
(229) 0x23812a LEA (%R12,%R8,8),%R11 |
(229) 0x23812e NOT %R8 |
(229) 0x238131 JMP 238148 |
(232) 0x238140 INC %R14 |
(232) 0x238143 CMP %R14,%RBX |
(232) 0x238146 JE 238170 |
(232) 0x238148 CMP %R14,%R8 |
(232) 0x23814b JE 238140 |
(232) 0x23814d VMOVSD (%R11,%R14,8),%XMM2 |
(232) 0x238153 VUCOMISD %XMM1,%XMM2 |
(232) 0x238157 JAE 238140 |
(232) 0x238159 MOVSXD %R10D,%RAX |
(232) 0x23815c INC %R10D |
(232) 0x23815f VMOVSD %XMM2,(%RSI,%RAX,8) |
(232) 0x238164 JMP 238140 |
(229) 0x238170 TEST %R10D,%R10D |
(229) 0x238173 JLE 2380e0 |
(229) 0x238179 VMOVSD 0x238(%R9),%XMM15 |
(229) 0x238182 VMOVUPD 0x18(%R9),%XMM1 |
(229) 0x238188 VMOVSD 0x28(%R9),%XMM19 |
(229) 0x23818f VMOVSD 0x30(%R9),%XMM20 |
(229) 0x238196 VMOVSD 0x48(%R9),%XMM2 |
(229) 0x23819c VMOVSD 0x68(%R9),%XMM3 |
(229) 0x2381a2 VMOVSD 0x38(%R9),%XMM4 |
(229) 0x2381a8 VMOVSD 0x40(%R9),%XMM5 |
(229) 0x2381ae VMOVSD 0x50(%R9),%XMM8 |
(229) 0x2381b4 VMOVSD 0x58(%R9),%XMM6 |
(229) 0x2381ba VMOVSD 0x60(%R9),%XMM9 |
(229) 0x2381c0 VMOVSD 0x70(%R9),%XMM10 |
(229) 0x2381c6 VMOVSD 0x78(%R9),%XMM29 |
(229) 0x2381cd VMOVSD 0x80(%R9),%XMM30 |
(229) 0x2381d4 VMOVSD 0x88(%R9),%XMM31 |
(229) 0x2381db VMOVSD 0x90(%R9),%XMM16 |
(229) 0x2381e2 MOV 0x218(%R9),%R8 |
(229) 0x2381e9 MOV %R10D,%R9D |
(229) 0x2381ec CMP $0x8,%R10D |
(229) 0x2381f0 JAE 2381fe |
(229) 0x2381f2 VXORPD %XMM0,%XMM0,%XMM0 |
(229) 0x2381f6 XOR %R10D,%R10D |
(229) 0x2381f9 JMP 23871c |
(229) 0x2381fe VMOVAPD %XMM16,%XMM0 |
(229) 0x238204 MOV %R9,%R11 |
(229) 0x238207 SHR $0x3,%R11 |
(229) 0x23820b VMOVAPD %XMM20,-0x160(%RBP) |
(229) 0x238212 VMOVAPD %XMM29,-0x150(%RBP) |
(229) 0x238219 VMOVAPD %XMM30,-0x140(%RBP) |
(229) 0x238220 VMOVAPD %XMM31,-0x130(%RBP) |
(229) 0x238227 VBROADCASTSD %XMM15,%YMM16 |
(229) 0x23822d VBROADCASTSD %XMM1,%YMM17 |
(229) 0x238233 VPERMPD $0x55,%YMM1,%YMM18 |
(229) 0x23823a VBROADCASTSD %XMM19,%YMM7 |
(229) 0x238240 VBROADCASTSD %XMM20,%YMM20 |
(229) 0x238246 VBROADCASTSD %XMM4,%YMM21 |
(229) 0x23824c VBROADCASTSD %XMM5,%YMM22 |
(229) 0x238252 VBROADCASTSD %XMM2,%YMM23 |
(229) 0x238258 VMOVAPD %XMM8,-0xc0(%RBP) |
(229) 0x238260 VBROADCASTSD %XMM8,%YMM24 |
(229) 0x238266 VBROADCASTSD %XMM6,%YMM25 |
(229) 0x23826c VBROADCASTSD %XMM9,%YMM26 |
(229) 0x238272 VBROADCASTSD %XMM3,%YMM27 |
(229) 0x238278 VBROADCASTSD %XMM10,%YMM28 |
(229) 0x23827e VBROADCASTSD %XMM29,%YMM29 |
(229) 0x238284 VBROADCASTSD %XMM30,%YMM30 |
(229) 0x23828a VBROADCASTSD %XMM31,%YMM31 |
(229) 0x238290 VBROADCASTSD %XMM0,%YMM8 |
(229) 0x238295 MOV %R9D,%R10D |
(229) 0x238298 VMOVAPD %XMM2,-0x100(%RBP) |
(229) 0x2382a0 VMOVAPD %XMM3,-0xf0(%RBP) |
(229) 0x2382a8 AND $-0x8,%R10D |
(229) 0x2382ac VXORPD %XMM3,%XMM3,%XMM3 |
(229) 0x2382b0 VXORPD %XMM2,%XMM2,%XMM2 |
(229) 0x2382b4 VMOVSD %XMM14,-0x30(%RBP) |
(229) 0x2382b9 VMOVAPD %XMM15,-0x180(%RBP) |
(229) 0x2382c1 VMOVAPD %XMM19,-0x170(%RBP) |
(229) 0x2382c8 VMOVAPD %XMM4,-0x110(%RBP) |
(229) 0x2382d0 VMOVAPD %XMM5,-0xe0(%RBP) |
(229) 0x2382d8 VMOVAPD %XMM6,-0xd0(%RBP) |
(229) 0x2382e0 VMOVAPD %XMM9,-0xb0(%RBP) |
(229) 0x2382e8 VMOVAPD %XMM10,-0xa0(%RBP) |
(229) 0x2382f0 VMOVAPD %XMM0,-0x120(%RBP) |
(229) 0x2382f8 MOV %R9,-0x80(%RBP) |
(229) 0x2382fc XOR %R14D,%R14D |
(229) 0x2382ff SAL $0x6,%R11 |
(229) 0x238303 NOPW %CS:(%RAX,%RAX,1) |
(230) 0x238310 VMULPD (%RSI,%R14,1),%YMM16,%YMM9 |
(230) 0x238317 VMULPD 0x20(%RSI,%R14,1),%YMM16,%YMM11 |
(230) 0x23831f VMOVAPD %YMM7,%YMM19 |
(230) 0x238325 ADD $0x40,%R14 |
(230) 0x238329 VCVTTPD2DQ %YMM9,%XMM15 |
(230) 0x23832e VCVTTPD2DQ %YMM11,%XMM0 |
(230) 0x238333 VROUNDPD $0xb,%YMM9,%YMM10 |
(230) 0x238339 VROUNDPD $0xb,%YMM11,%YMM12 |
(230) 0x23833f VPMOVSXDQ %XMM15,%YMM4 |
(230) 0x238344 VPMOVSXDQ %XMM0,%YMM5 |
(230) 0x238349 VSUBPD %YMM10,%YMM9,%YMM10 |
(230) 0x23834e VSUBPD %YMM12,%YMM11,%YMM9 |
(230) 0x238353 VPEXTRQ $0x1,%XMM4,%RBX |
(230) 0x238359 VMOVQ %XMM4,%RDX |
(230) 0x23835e VEXTRACTI128 $0x1,%YMM4,%XMM4 |
(230) 0x238364 VMOVQ %XMM5,%RCX |
(230) 0x238369 VPEXTRQ $0x1,%XMM5,%RAX |
(230) 0x23836f VMOVQ %XMM4,%R12 |
(230) 0x238374 VPEXTRQ $0x1,%XMM4,%R13 |
(230) 0x23837a VEXTRACTI128 $0x1,%YMM5,%XMM4 |
(230) 0x238380 VMOVSD (%R8,%RDX,8),%XMM5 |
(230) 0x238386 VMOVSD (%R8,%RCX,8),%XMM6 |
(230) 0x23838c VMULPD %YMM10,%YMM10,%YMM14 |
(230) 0x238391 VFMADD213PD %YMM20,%YMM9,%YMM19 |
(230) 0x238397 VMULPD %YMM9,%YMM9,%YMM13 |
(230) 0x23839c VMOVQ %XMM4,%R15 |
(230) 0x2383a1 VPEXTRQ $0x1,%XMM4,%R9 |
(230) 0x2383a7 VMOVSD (%R8,%R12,8),%XMM4 |
(230) 0x2383ad VMOVHPD (%R8,%RBX,8),%XMM5,%XMM5 |
(230) 0x2383b3 VMOVHPD (%R8,%RAX,8),%XMM6,%XMM6 |
(230) 0x2383b9 VMOVHPD (%R8,%R13,8),%XMM4,%XMM4 |
(230) 0x2383bf VMULPD %YMM10,%YMM14,%YMM12 |
(230) 0x2383c4 VMULPD %YMM9,%YMM13,%YMM11 |
(230) 0x2383c9 VFMADD231PD %YMM18,%YMM13,%YMM19 |
(230) 0x2383cf VFMADD231PD %YMM17,%YMM11,%YMM19 |
(230) 0x2383d5 VINSERTF128 $0x1,%XMM4,%YMM5,%YMM4 |
(230) 0x2383db VMOVSD (%R8,%R15,8),%XMM5 |
(230) 0x2383e1 VMOVHPD (%R8,%R9,8),%XMM5,%XMM5 |
(230) 0x2383e7 VINSERTF128 $0x1,%XMM5,%YMM6,%YMM5 |
(230) 0x2383ed VMOVAPD %YMM7,%YMM6 |
(230) 0x2383f1 VFMADD213PD %YMM20,%YMM10,%YMM6 |
(230) 0x2383f7 VFMADD213PD %YMM2,%YMM5,%YMM19 |
(230) 0x2383fd VMOVAPD %YMM23,%YMM5 |
(230) 0x238403 VFMADD213PD %YMM24,%YMM9,%YMM5 |
(230) 0x238409 VFMADD231PD %YMM18,%YMM14,%YMM6 |
(230) 0x23840f VFMADD231PD %YMM22,%YMM13,%YMM5 |
(230) 0x238415 VFMADD231PD %YMM17,%YMM12,%YMM6 |
(230) 0x23841b VFMADD231PD %YMM21,%YMM11,%YMM5 |
(230) 0x238421 VFMADD213PD %YMM3,%YMM4,%YMM6 |
(230) 0x238426 VPCMPEQD %XMM3,%XMM3,%XMM3 |
(230) 0x23842a VPSUBD %XMM3,%XMM15,%XMM2 |
(230) 0x23842e VPSUBD %XMM3,%XMM0,%XMM3 |
(230) 0x238432 VPMOVSXDQ %XMM2,%YMM2 |
(230) 0x238437 VPMOVSXDQ %XMM3,%YMM3 |
(230) 0x23843c VMOVQ %XMM2,%RAX |
(230) 0x238441 VPEXTRQ $0x1,%XMM2,%RCX |
(230) 0x238447 VEXTRACTI128 $0x1,%YMM2,%XMM2 |
(230) 0x23844d VMOVQ %XMM3,%RBX |
(230) 0x238452 VPEXTRQ $0x1,%XMM3,%R15 |
(230) 0x238458 VPEXTRQ $0x1,%XMM2,%RDX |
(230) 0x23845e VMOVQ %XMM2,%R9 |
(230) 0x238463 VEXTRACTI128 $0x1,%YMM3,%XMM2 |
(230) 0x238469 VMOVSD (%R8,%RAX,8),%XMM3 |
(230) 0x23846f VMOVSD (%R8,%RBX,8),%XMM4 |
(230) 0x238475 VMOVQ %XMM2,%R12 |
(230) 0x23847a VPEXTRQ $0x1,%XMM2,%R13 |
(230) 0x238480 VMOVSD (%R8,%R9,8),%XMM2 |
(230) 0x238486 VMOVHPD (%R8,%RCX,8),%XMM3,%XMM3 |
(230) 0x23848c VMOVHPD (%R8,%R15,8),%XMM4,%XMM4 |
(230) 0x238492 VMOVHPD (%R8,%RDX,8),%XMM2,%XMM2 |
(230) 0x238498 VINSERTF128 $0x1,%XMM2,%YMM3,%YMM2 |
(230) 0x23849e VMOVSD (%R8,%R12,8),%XMM3 |
(230) 0x2384a4 VMOVHPD (%R8,%R13,8),%XMM3,%XMM3 |
(230) 0x2384aa VINSERTF128 $0x1,%XMM3,%YMM4,%YMM3 |
(230) 0x2384b0 VMOVAPD %YMM23,%YMM4 |
(230) 0x2384b6 VFMADD213PD %YMM24,%YMM10,%YMM4 |
(230) 0x2384bc VFMADD213PD %YMM19,%YMM3,%YMM5 |
(230) 0x2384c2 VMOVAPD %YMM27,%YMM19 |
(230) 0x2384c8 VFMADD213PD %YMM28,%YMM9,%YMM19 |
(230) 0x2384ce VFMADD213PD %YMM8,%YMM31,%YMM9 |
(230) 0x2384d4 VFMADD231PD %YMM22,%YMM14,%YMM4 |
(230) 0x2384da VFMADD231PD %YMM26,%YMM13,%YMM19 |
(230) 0x2384e0 VFMADD231PD %YMM13,%YMM30,%YMM9 |
(230) 0x2384e6 VFMADD231PD %YMM21,%YMM12,%YMM4 |
(230) 0x2384ec VFMADD231PD %YMM25,%YMM11,%YMM19 |
(230) 0x2384f2 VFMADD231PD %YMM11,%YMM29,%YMM9 |
(230) 0x2384f8 VFMADD213PD %YMM6,%YMM2,%YMM4 |
(230) 0x2384fd VPBROADCASTD -0x343a6(%RIP),%XMM2 |
(230) 0x238506 VPADDD %XMM2,%XMM15,%XMM3 |
(230) 0x23850a VPADDD %XMM2,%XMM0,%XMM2 |
(230) 0x23850e VPMOVSXDQ %XMM3,%YMM3 |
(230) 0x238513 VPMOVSXDQ %XMM2,%YMM2 |
(230) 0x238518 VMOVQ %XMM3,%RAX |
(230) 0x23851d VPEXTRQ $0x1,%XMM3,%RCX |
(230) 0x238523 VEXTRACTI128 $0x1,%YMM3,%XMM3 |
(230) 0x238529 VMOVQ %XMM2,%R15 |
(230) 0x23852e VPEXTRQ $0x1,%XMM2,%RBX |
(230) 0x238534 VEXTRACTI128 $0x1,%YMM2,%XMM2 |
(230) 0x23853a VMOVQ %XMM3,%RDX |
(230) 0x23853f VPEXTRQ $0x1,%XMM3,%R9 |
(230) 0x238545 VMOVSD (%R8,%RAX,8),%XMM3 |
(230) 0x23854b VMOVQ %XMM2,%R12 |
(230) 0x238550 VPEXTRQ $0x1,%XMM2,%R13 |
(230) 0x238556 VMOVSD (%R8,%R15,8),%XMM6 |
(230) 0x23855c VMOVSD (%R8,%RDX,8),%XMM2 |
(230) 0x238562 VMOVHPD (%R8,%RCX,8),%XMM3,%XMM3 |
(230) 0x238568 VMOVHPD (%R8,%RBX,8),%XMM6,%XMM6 |
(230) 0x23856e VMOVHPD (%R8,%R9,8),%XMM2,%XMM2 |
(230) 0x238574 VINSERTF128 $0x1,%XMM2,%YMM3,%YMM2 |
(230) 0x23857a VMOVSD (%R8,%R12,8),%XMM3 |
(230) 0x238580 VMOVHPD (%R8,%R13,8),%XMM3,%XMM3 |
(230) 0x238586 VINSERTF128 $0x1,%XMM3,%YMM6,%YMM3 |
(230) 0x23858c VMOVAPD %YMM27,%YMM6 |
(230) 0x238592 VFMADD213PD %YMM28,%YMM10,%YMM6 |
(230) 0x238598 VFMADD213PD %YMM8,%YMM31,%YMM10 |
(230) 0x23859e VFMADD213PD %YMM5,%YMM3,%YMM19 |
(230) 0x2385a4 VFMADD231PD %YMM26,%YMM14,%YMM6 |
(230) 0x2385aa VFMADD231PD %YMM14,%YMM30,%YMM10 |
(230) 0x2385b0 VFMADD231PD %YMM25,%YMM12,%YMM6 |
(230) 0x2385b6 VFMADD231PD %YMM12,%YMM29,%YMM10 |
(230) 0x2385bc VFMADD213PD %YMM4,%YMM2,%YMM6 |
(230) 0x2385c1 VPBROADCASTD -0x34482(%RIP),%XMM2 |
(230) 0x2385ca VPADDD %XMM2,%XMM15,%XMM3 |
(230) 0x2385ce VPADDD %XMM2,%XMM0,%XMM0 |
(230) 0x2385d2 VPMOVSXDQ %XMM3,%YMM2 |
(230) 0x2385d7 VPMOVSXDQ %XMM0,%YMM0 |
(230) 0x2385dc VMOVQ %XMM2,%RAX |
(230) 0x2385e1 VPEXTRQ $0x1,%XMM2,%RCX |
(230) 0x2385e7 VEXTRACTI128 $0x1,%YMM2,%XMM2 |
(230) 0x2385ed VMOVQ %XMM0,%RBX |
(230) 0x2385f2 VPEXTRQ $0x1,%XMM0,%R15 |
(230) 0x2385f8 VEXTRACTI128 $0x1,%YMM0,%XMM0 |
(230) 0x2385fe VMOVQ %XMM2,%R9 |
(230) 0x238603 VPEXTRQ $0x1,%XMM2,%RDX |
(230) 0x238609 VMOVSD (%R8,%RAX,8),%XMM2 |
(230) 0x23860f VMOVQ %XMM0,%R12 |
(230) 0x238614 VPEXTRQ $0x1,%XMM0,%R13 |
(230) 0x23861a VMOVSD (%R8,%RBX,8),%XMM3 |
(230) 0x238620 VMOVSD (%R8,%R9,8),%XMM0 |
(230) 0x238626 VMOVHPD (%R8,%RCX,8),%XMM2,%XMM2 |
(230) 0x23862c VMOVHPD (%R8,%R15,8),%XMM3,%XMM3 |
(230) 0x238632 VMOVHPD (%R8,%RDX,8),%XMM0,%XMM0 |
(230) 0x238638 VINSERTF128 $0x1,%XMM0,%YMM2,%YMM0 |
(230) 0x23863e VMOVSD (%R8,%R12,8),%XMM2 |
(230) 0x238644 VMOVHPD (%R8,%R13,8),%XMM2,%XMM2 |
(230) 0x23864a VINSERTF128 $0x1,%XMM2,%YMM3,%YMM4 |
(230) 0x238650 VMOVAPD %YMM10,%YMM3 |
(230) 0x238654 VMOVAPD %YMM9,%YMM2 |
(230) 0x238658 VFMADD213PD %YMM6,%YMM0,%YMM3 |
(230) 0x23865d VFMADD213PD %YMM19,%YMM4,%YMM2 |
(230) 0x238663 CMP %R14,%R11 |
(230) 0x238666 JNE 238310 |
(229) 0x23866c VADDPD %YMM3,%YMM2,%YMM0 |
(229) 0x238670 VMOVSD -0x30(%RBP),%XMM14 |
(229) 0x238675 VMOVAPD -0x180(%RBP),%XMM15 |
(229) 0x23867d VMOVAPD -0x170(%RBP),%XMM19 |
(229) 0x238684 VMOVAPD -0x160(%RBP),%XMM20 |
(229) 0x23868b VMOVAPD -0x150(%RBP),%XMM29 |
(229) 0x238692 VMOVAPD -0x140(%RBP),%XMM30 |
(229) 0x238699 VMOVAPD -0x130(%RBP),%XMM31 |
(229) 0x2386a0 VMOVAPD -0x120(%RBP),%XMM16 |
(229) 0x2386a7 VMOVAPD -0x110(%RBP),%XMM4 |
(229) 0x2386af VMOVAPD -0xf0(%RBP),%XMM3 |
(229) 0x2386b7 VMOVAPD -0xe0(%RBP),%XMM5 |
(229) 0x2386bf VMOVAPD -0xd0(%RBP),%XMM6 |
(229) 0x2386c7 VMOVAPD -0xc0(%RBP),%XMM8 |
(229) 0x2386cf VMOVAPD -0xb0(%RBP),%XMM9 |
(229) 0x2386d7 VMOVAPD -0xa0(%RBP),%XMM10 |
(229) 0x2386df MOV -0x80(%RBP),%R9 |
(229) 0x2386e3 MOV -0x58(%RBP),%R15 |
(229) 0x2386e7 MOV -0x78(%RBP),%R12 |
(229) 0x2386eb MOV -0x70(%RBP),%RCX |
(229) 0x2386ef MOV -0x68(%RBP),%RDX |
(229) 0x2386f3 MOV -0x60(%RBP),%R13 |
(229) 0x2386f7 VEXTRACTF128 $0x1,%YMM0,%XMM2 |
(229) 0x2386fd VADDPD %XMM2,%XMM0,%XMM0 |
(229) 0x238701 VPERMILPD $0x1,%XMM0,%XMM2 |
(229) 0x238707 VADDSD %XMM2,%XMM0,%XMM0 |
(229) 0x23870b VMOVAPD -0x100(%RBP),%XMM2 |
(229) 0x238713 CMP %R9,%R10 |
(229) 0x238716 JE 2380e0 |
(229) 0x23871c VUNPCKLPD %XMM3,%XMM2,%XMM2 |
(229) 0x238720 VUNPCKLPD %XMM10,%XMM8,%XMM3 |
(229) 0x238725 VUNPCKLPD %XMM9,%XMM5,%XMM8 |
(229) 0x23872a VUNPCKLPD %XMM6,%XMM4,%XMM9 |
(229) 0x23872e XCHG %AX,%AX |
(231) 0x238730 VMULSD (%RSI,%R10,8),%XMM15,%XMM4 |
(231) 0x238736 VMOVAPD %XMM19,%XMM12 |
(231) 0x23873c INC %R10 |
(231) 0x23873f VROUNDSD $0xb,%XMM4,%XMM4,%XMM5 |
(231) 0x238745 VCVTTSD2SI %XMM4,%EAX |
(231) 0x238749 MOVSXD %EAX,%R11 |
(231) 0x23874c VSUBSD %XMM5,%XMM4,%XMM4 |
(231) 0x238750 VMULSD %XMM4,%XMM4,%XMM5 |
(231) 0x238754 VFMADD213SD %XMM20,%XMM4,%XMM12 |
(231) 0x23875a VMULSD %XMM4,%XMM5,%XMM6 |
(231) 0x23875e VMOVDDUP %XMM5,%XMM10 |
(231) 0x238762 VMOVDDUP %XMM6,%XMM7 |
(231) 0x238766 VUNPCKLPD %XMM10,%XMM7,%XMM11 |
(231) 0x23876b VMULPD %XMM1,%XMM11,%XMM11 |
(231) 0x23876f VPERMILPD $0x1,%XMM11,%XMM13 |
(231) 0x238775 VADDSD %XMM11,%XMM12,%XMM11 |
(231) 0x23877a VADDSD %XMM13,%XMM11,%XMM11 |
(231) 0x23877f VMOVDDUP %XMM4,%XMM13 |
(231) 0x238783 VFMADD213SD %XMM16,%XMM31,%XMM4 |
(231) 0x238789 VFMADD132SD (%R8,%R11,8),%XMM0,%XMM11 |
(231) 0x23878f VFMADD213PD %XMM3,%XMM2,%XMM13 |
(231) 0x238794 VFMADD231SD %XMM5,%XMM30,%XMM4 |
(231) 0x23879a VFMADD231PD %XMM10,%XMM8,%XMM13 |
(231) 0x23879f VFMADD231SD %XMM6,%XMM29,%XMM4 |
(231) 0x2387a5 VFMADD231PD %XMM7,%XMM9,%XMM13 |
(231) 0x2387aa VMULPD 0x8(%R8,%R11,8),%XMM13,%XMM7 |
(231) 0x2387b1 VPERMILPD $0x1,%XMM7,%XMM0 |
(231) 0x2387b7 VADDSD %XMM0,%XMM7,%XMM0 |
(231) 0x2387bb VADDSD %XMM0,%XMM11,%XMM0 |
(231) 0x2387bf VFMADD231SD 0x18(%R8,%R11,8),%XMM4,%XMM0 |
(231) 0x2387c6 CMP %R10,%R9 |
(231) 0x2387c9 JNE 238730 |
(229) 0x2387cf JMP 2380e0 |
0x2387e0 MOV 0x90(%R15),%EAX |
0x2387e7 VXORPD %XMM14,%XMM14,%XMM14 |
0x2387ec TEST %EAX,%EAX |
0x2387ee JLE 238030 |
0x2387f4 XOR %R14D,%R14D |
0x2387f7 JMP 238813 |
(228) 0x238800 VADDSD %XMM0,%XMM14,%XMM14 |
(228) 0x238804 INC %R14 |
(228) 0x238807 MOVSXD %EAX,%RCX |
(228) 0x23880a CMP %RCX,%R14 |
(228) 0x23880d JGE 238030 |
(228) 0x238813 MOV 0xa0(%R15),%RCX |
(228) 0x23881a MOV 0x1c8(%R15),%RDX |
(228) 0x238821 MOV 0x18(%RCX),%RCX |
(228) 0x238825 MOVSXD (%RCX,%R14,4),%RCX |
(228) 0x238829 MOV (%RDX,%RCX,8),%RBX |
(228) 0x23882d TEST %RBX,%RBX |
(228) 0x238830 JE 238804 |
(228) 0x238832 VMOVSD (%R12,%R14,8),%XMM1 |
(228) 0x238838 VMOVSD 0x8(%RBX),%XMM2 |
(228) 0x23883d VXORPD %XMM0,%XMM0,%XMM0 |
(228) 0x238841 VUCOMISD %XMM1,%XMM2 |
(228) 0x238845 JBE 238800 |
(228) 0x238847 VMULSD 0x238(%RBX),%XMM1,%XMM0 |
(228) 0x23884f LEA -0x88(%RBP),%RDI |
(228) 0x238856 VMOVSD %XMM14,-0x30(%RBP) |
(228) 0x23885b CALL 2f6680 <@plt_start@+0x370> |
(228) 0x238860 VMOVSD 0x18(%RBX),%XMM3 |
(228) 0x238865 VMOVSD 0x30(%RBX),%XMM6 |
(228) 0x23886a VMULSD %XMM0,%XMM0,%XMM1 |
(228) 0x23886e VMOVSD 0x20(%RBX),%XMM4 |
(228) 0x238873 VCVTTSD2SI -0x88(%RBP),%EAX |
(228) 0x23887b VMOVSD 0x28(%RBX),%XMM5 |
(228) 0x238880 MOV 0x218(%RBX),%RCX |
(228) 0x238887 VMOVSD -0x30(%RBP),%XMM14 |
(228) 0x23888c VMOVHPD 0x40(%RBX),%XMM3,%XMM3 |
(228) 0x238891 VMOVHPD 0x50(%RBX),%XMM6,%XMM6 |
(228) 0x238896 VMOVHPD 0x38(%RBX),%XMM4,%XMM4 |
(228) 0x23889b VMULSD %XMM0,%XMM1,%XMM2 |
(228) 0x23889f VMOVDDUP %XMM0,%XMM0 |
(228) 0x2388a3 VUNPCKLPD %XMM1,%XMM2,%XMM8 |
(228) 0x2388a7 VUNPCKLPD %XMM2,%XMM1,%XMM7 |
(228) 0x2388ab CLTQ |
(228) 0x2388ad VFMADD231PD %XMM8,%XMM3,%XMM6 |
(228) 0x2388b2 VMOVHPD 0x48(%RBX),%XMM5,%XMM3 |
(228) 0x2388b7 VFMADD231PD %XMM7,%XMM4,%XMM6 |
(228) 0x2388bc VFMADD213PD %XMM6,%XMM0,%XMM3 |
(228) 0x2388c1 VMULPD (%RCX,%RAX,8),%XMM3,%XMM3 |
(228) 0x2388c6 VUNPCKLPD %XMM0,%XMM1,%XMM0 |
(228) 0x2388ca VMULPD 0x60(%RBX),%XMM0,%XMM1 |
(228) 0x2388cf VMULPD 0x80(%RBX),%XMM0,%XMM0 |
(228) 0x2388d7 VPERMILPD $0x1,%XMM3,%XMM4 |
(228) 0x2388dd VADDSD %XMM3,%XMM4,%XMM3 |
(228) 0x2388e1 VPERMILPD $0x1,%XMM1,%XMM4 |
(228) 0x2388e7 VFMADD231SD 0x58(%RBX),%XMM2,%XMM1 |
(228) 0x2388ed VADDSD %XMM4,%XMM1,%XMM1 |
(228) 0x2388f1 VADDSD 0x70(%RBX),%XMM1,%XMM1 |
(228) 0x2388f6 VFMADD132SD 0x10(%RCX,%RAX,8),%XMM3,%XMM1 |
(228) 0x2388fd VPERMILPD $0x1,%XMM0,%XMM3 |
(228) 0x238903 VFMADD231SD 0x78(%RBX),%XMM2,%XMM0 |
(228) 0x238909 VADDSD %XMM3,%XMM0,%XMM0 |
(228) 0x23890d VADDSD 0x90(%RBX),%XMM0,%XMM0 |
(228) 0x238915 VFMADD132SD 0x18(%RCX,%RAX,8),%XMM1,%XMM0 |
(228) 0x23891c MOV 0x90(%R15),%EAX |
(228) 0x238923 JMP 238800 |
/usr/lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h: 1296 - 1296 |
-------------------------------------------------------------------------------- |
1296: { return _M_ptr; } |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 166 - 260 |
-------------------------------------------------------------------------------- |
166: if (r >= cutoff_radius) |
167: return 0.0; |
168: r *= DeltaRInv; |
169: real_type ipart, t; |
170: t = std::modf(r, &ipart); |
171: int i = (int)ipart; |
172: real_type tp[4]; |
173: tp[0] = t * t * t; |
[...] |
179: (SplineCoefs[i+0]*(A[ 0]*tp[0] + A[ 1]*tp[1] + A[ 2]*tp[2] + A[ 3]*tp[3])+ |
180: SplineCoefs[i+1]*(A[ 4]*tp[0] + A[ 5]*tp[1] + A[ 6]*tp[2] + A[ 7]*tp[3])+ |
181: SplineCoefs[i+2]*(A[ 8]*tp[0] + A[ 9]*tp[1] + A[10]*tp[2] + A[11]*tp[3])+ |
182: SplineCoefs[i+3]*(A[12]*tp[0] + A[13]*tp[1] + A[14]*tp[2] + A[15]*tp[3])); |
[...] |
233: const int iLimit = iEnd - iStart; |
234: |
235: #pragma vector always |
236: for (int jat = 0; jat < iLimit; jat++) |
237: { |
238: real_type r = distArray[jat]; |
239: // pick the distances smaller than the cutoff and avoid the reference atom |
240: if (r < cutoff_radius && iStart + jat != iat) |
241: distArrayCompressed[iCount++] = distArray[jat]; |
242: } |
243: |
244: real_type d = 0.0; |
245: //#pragma omp simd reduction(+:d) |
246: for (int jat = 0; jat < iCount; jat++) |
247: { |
248: real_type r = distArrayCompressed[jat]; |
249: r *= DeltaRInv; |
250: int i = (int)r; |
251: real_type t = r - real_type(i); |
252: real_type tp0 = t * t * t; |
253: real_type tp1 = t * t; |
254: real_type tp2 = t; |
255: |
256: real_type d1 = SplineCoefs[i + 0] * (A[0] * tp0 + A[1] * tp1 + A[2] * tp2 + A[3]); |
257: real_type d2 = SplineCoefs[i + 1] * (A[4] * tp0 + A[5] * tp1 + A[6] * tp2 + A[7]); |
258: real_type d3 = SplineCoefs[i + 2] * (A[8] * tp0 + A[9] * tp1 + A[10] * tp2 + A[11]); |
259: real_type d4 = SplineCoefs[i + 3] * (A[12] * tp0 + A[13] * tp1 + A[14] * tp2 + A[15]); |
260: d += (d1 + d2 + d3 + d4); |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 313 - 313 |
-------------------------------------------------------------------------------- |
313: inline int first(int igroup) const { return (*group_offsets_)[igroup]; } |
/usr/lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/stl_vector.h: 919 - 1064 |
-------------------------------------------------------------------------------- |
919: { return size_type(this->_M_impl._M_finish - this->_M_impl._M_start); } |
[...] |
1046: return *(this->_M_impl._M_start + __n); |
[...] |
1064: return *(this->_M_impl._M_start + __n); |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 249 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
229: return X[i]; |
[...] |
249: inline const_pointer data() const { return X; } |
/home/kcamus/qaas_runs/170-254-9426/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/OneBodyJastrowRef.h: 134 - 155 |
-------------------------------------------------------------------------------- |
134: for (int k = 0; k < ratios.size(); ++k) |
135: ratios[k] = std::exp(Vat[VP.refPtcl] - computeU(VP.getDistTableAB(myTableID).getDistRow(k).data())); |
[...] |
141: if (NumGroups > 0) |
142: { |
143: for (int jg = 0; jg < NumGroups; ++jg) |
144: { |
145: if (F[jg] != nullptr) |
146: curVat += F[jg]->evaluateV(-1, Ions.first(jg), Ions.last(jg), dist, DistCompressed.data()); |
147: } |
148: } |
149: else |
150: { |
151: for (int c = 0; c < Nions; ++c) |
152: { |
153: int gid = Ions.GroupID[c]; |
154: if (F[gid] != nullptr) |
155: curVat += F[gid]->evaluate(dist[c]); |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 4.00 |
CQA speedup if FP arith vectorized | 1.58 |
CQA speedup if fully vectorized | 5.33 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.13 |
Bottlenecks | P5, P6, P7, |
Function | miniqmcreference::OneBodyJastrowRef |
Source | stl_vector.h:919-919,stl_vector.h:1046-1046,stl_vector.h:1064-1064,OhmmsVector.h:223-223,OhmmsVector.h:249-249,OneBodyJastrowRef.h:134-135,OneBodyJastrowRef.h:141-141,OneBodyJastrowRef.h:151-151 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 8.67 |
CQA cycles if no scalar integer | 2.17 |
CQA cycles if FP arith vectorized | 5.50 |
CQA cycles if fully vectorized | 1.63 |
Front-end cycles | 7.67 |
DIV/SQRT cycles | 3.50 |
P0 cycles | 2.75 |
P1 cycles | 2.75 |
P2 cycles | 2.50 |
P3 cycles | 3.50 |
P4 cycles | 8.67 |
P5 cycles | 8.67 |
P6 cycles | 8.67 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 0.50 |
P10 cycles | 0.50 |
P11 cycles | 1.00 |
P12 cycles | 1.00 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 44.00 |
Nb uops | 46.00 |
Nb loads | 17.00 |
Nb stores | 7.00 |
Nb stack references | 8.00 |
FLOP/cycle | 0.12 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 20.31 |
Bytes prefetched | 0.00 |
Bytes loaded | 120.00 |
Bytes stored | 56.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 14.29 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 50.00 |
Vector-efficiency ratio all | 13.39 |
Vector-efficiency ratio load | 11.61 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 16.67 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 4.00 |
CQA speedup if FP arith vectorized | 1.58 |
CQA speedup if fully vectorized | 5.33 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.13 |
Bottlenecks | P5, P6, P7, |
Function | miniqmcreference::OneBodyJastrowRef |
Source | stl_vector.h:919-919,stl_vector.h:1046-1046,stl_vector.h:1064-1064,OhmmsVector.h:223-223,OhmmsVector.h:249-249,OneBodyJastrowRef.h:134-135,OneBodyJastrowRef.h:141-141,OneBodyJastrowRef.h:151-151 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 8.67 |
CQA cycles if no scalar integer | 2.17 |
CQA cycles if FP arith vectorized | 5.50 |
CQA cycles if fully vectorized | 1.63 |
Front-end cycles | 7.67 |
DIV/SQRT cycles | 3.50 |
P0 cycles | 2.75 |
P1 cycles | 2.75 |
P2 cycles | 2.50 |
P3 cycles | 3.50 |
P4 cycles | 8.67 |
P5 cycles | 8.67 |
P6 cycles | 8.67 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 0.50 |
P10 cycles | 0.50 |
P11 cycles | 1.00 |
P12 cycles | 1.00 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 44.00 |
Nb uops | 46.00 |
Nb loads | 17.00 |
Nb stores | 7.00 |
Nb stack references | 8.00 |
FLOP/cycle | 0.12 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 20.31 |
Bytes prefetched | 0.00 |
Bytes loaded | 120.00 |
Bytes stored | 56.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 14.29 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 50.00 |
Vector-efficiency ratio all | 13.39 |
Vector-efficiency ratio load | 11.61 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 16.67 |
Path / |
Function | miniqmcreference::OneBodyJastrowRef |
Source file and lines | OneBodyJastrowRef.h:134-155 |
Module | exec |
nb instructions | 44 |
nb uops | 46 |
loop length | 200 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 8 |
micro-operation queue | 7.67 cycles |
front end | 7.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.50 | 2.75 | 2.75 | 2.50 | 3.50 | 8.67 | 8.67 | 8.67 | 0.00 | 0.00 | 0.50 | 0.50 | 1.00 | 1.00 |
cycles | 3.50 | 2.75 | 2.75 | 2.50 | 3.50 | 8.67 | 8.67 | 8.67 | 0.00 | 0.00 | 0.50 | 0.50 | 1.00 | 1.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 7.67 |
Dispatch | 8.67 |
Overall L1 | 8.67 |
all | 7% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 25% |
all | 28% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 14% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 12% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 16% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 13% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VMOVSD -0x48(%RBP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VSUBSD %XMM14,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 2f6670 <@plt_start@+0x360> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x50(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RCX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD %XMM0,(%RAX,%RBX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
INC %RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %RAX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAR $0x3,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP %RBX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JBE 238928 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x938> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV -0x38(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xf0(%R15),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xa8(%R15),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVSXD 0x2a0(%RDI),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVSD (%RCX,%RAX,8),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM0,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
CALL 27c3b0 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x48(%RAX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%RBX,%RBX,4),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RBX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x18(%RAX,%RCX,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x98(%R15),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 2387e0 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x7f0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x1c8(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xa0(%R15),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x148(%R15),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R12,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RCX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R13,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 2380f0 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x100> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV 0x90(%R15),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 238030 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x40> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 238813 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x823> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Function | miniqmcreference::OneBodyJastrowRef |
Source file and lines | OneBodyJastrowRef.h:134-155 |
Module | exec |
nb instructions | 44 |
nb uops | 46 |
loop length | 200 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 8 |
micro-operation queue | 7.67 cycles |
front end | 7.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.50 | 2.75 | 2.75 | 2.50 | 3.50 | 8.67 | 8.67 | 8.67 | 0.00 | 0.00 | 0.50 | 0.50 | 1.00 | 1.00 |
cycles | 3.50 | 2.75 | 2.75 | 2.50 | 3.50 | 8.67 | 8.67 | 8.67 | 0.00 | 0.00 | 0.50 | 0.50 | 1.00 | 1.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 7.67 |
Dispatch | 8.67 |
Overall L1 | 8.67 |
all | 7% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 25% |
all | 28% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 14% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 12% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 16% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 13% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VMOVSD -0x48(%RBP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VSUBSD %XMM14,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 2f6670 <@plt_start@+0x360> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x50(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RCX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD %XMM0,(%RAX,%RBX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
INC %RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %RAX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAR $0x3,%RCX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP %RBX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JBE 238928 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x938> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV -0x38(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xf0(%R15),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xa8(%R15),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVSXD 0x2a0(%RDI),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVSD (%RCX,%RAX,8),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM0,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
CALL 27c3b0 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x48(%RAX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%RBX,%RBX,4),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RBX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x18(%RAX,%RCX,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x98(%R15),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %ECX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 2387e0 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x7f0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x1c8(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0xa0(%R15),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x148(%R15),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R12,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RCX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R13,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 2380f0 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x100> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV 0x90(%R15),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
TEST %EAX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 238030 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x40> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 238813 <_ZN16miniqmcreference17OneBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x823> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |