Loop Id: 323 | Module: exec | Source: BsplineFunctor.h:236-260 [...] | Coverage: 0.03% |
---|
Loop Id: 323 | Module: exec | Source: BsplineFunctor.h:236-260 [...] | Coverage: 0.03% |
---|
0x23ef70 VADDSD %XMM0,%XMM14,%XMM14 |
0x23ef74 CMP %RDI,%R14 |
0x23ef77 JE 23ee75 |
0x23ef7d MOV %R14,%RAX |
0x23ef80 MOV %ECX,%EDX |
0x23ef82 MOV 0x4(%R13,%RAX,4),%ECX |
0x23ef87 INC %R14 |
0x23ef8a VXORPD %XMM0,%XMM0,%XMM0 |
0x23ef8e MOV %ECX,%EBX |
0x23ef90 SUB %EDX,%EBX |
0x23ef92 JLE 23ef70 |
0x23ef94 ADD %R9,%RAX |
0x23ef97 MOVSXD %EDX,%RDX |
0x23ef9a MOV %R12,%R15 |
0x23ef9d MOV %R14,-0x38(%RBP) |
0x23efa1 MOV %EBX,%R14D |
0x23efa4 MOV %ECX,-0x2c(%RBP) |
0x23efa7 XOR %EBX,%EBX |
0x23efa9 MOV (%R10,%RAX,8),%RAX |
0x23efad LEA (%R8,%RDX,8),%R11 |
0x23efb1 SUB %RDX,%R15 |
0x23efb4 XOR %EDX,%EDX |
0x23efb6 VMOVSD 0x8(%RAX),%XMM0 |
0x23efbb JMP 23efc8 |
(326) 0x23efc0 INC %RDX |
(326) 0x23efc3 CMP %RDX,%R14 |
(326) 0x23efc6 JE 23eff0 |
(326) 0x23efc8 CMP %RDX,%R15 |
(326) 0x23efcb JE 23efc0 |
(326) 0x23efcd VMOVSD (%R11,%RDX,8),%XMM1 |
(326) 0x23efd3 VUCOMISD %XMM0,%XMM1 |
(326) 0x23efd7 JAE 23efc0 |
(326) 0x23efd9 MOVSXD %EBX,%RCX |
(326) 0x23efdc INC %EBX |
(326) 0x23efde VMOVSD %XMM1,(%RSI,%RCX,8) |
(326) 0x23efe3 JMP 23efc0 |
0x23eff0 MOV -0x2c(%RBP),%ECX |
0x23eff3 MOV -0x38(%RBP),%R14 |
0x23eff7 VXORPD %XMM0,%XMM0,%XMM0 |
0x23effb TEST %EBX,%EBX |
0x23effd JLE 23ef70 |
0x23f003 VMOVSD 0x238(%RAX),%XMM15 |
0x23f00b VMOVUPD 0x18(%RAX),%XMM2 |
0x23f010 VMOVSD 0x28(%RAX),%XMM16 |
0x23f017 VMOVSD 0x30(%RAX),%XMM20 |
0x23f01e VMOVSD 0x48(%RAX),%XMM3 |
0x23f023 VMOVSD 0x68(%RAX),%XMM4 |
0x23f028 VMOVSD 0x38(%RAX),%XMM5 |
0x23f02d VMOVSD 0x40(%RAX),%XMM6 |
0x23f032 VMOVSD 0x50(%RAX),%XMM9 |
0x23f037 VMOVSD 0x58(%RAX),%XMM7 |
0x23f03c VMOVSD 0x60(%RAX),%XMM10 |
0x23f041 VMOVSD 0x70(%RAX),%XMM11 |
0x23f046 VMOVSD 0x78(%RAX),%XMM30 |
0x23f04d VMOVSD 0x80(%RAX),%XMM31 |
0x23f054 VMOVSD 0x88(%RAX),%XMM17 |
0x23f05b VMOVSD 0x90(%RAX),%XMM18 |
0x23f062 MOV 0x218(%RAX),%R11 |
0x23f069 MOV %EBX,%R15D |
0x23f06c CMP $0x8,%EBX |
0x23f06f JAE 23f078 |
0x23f071 XOR %EBX,%EBX |
0x23f073 JMP 23f59c |
0x23f078 VMOVAPD %XMM17,%XMM0 |
0x23f07e VMOVAPD %XMM18,%XMM1 |
0x23f084 MOV %R15,%R14 |
0x23f087 SHR $0x3,%R14 |
0x23f08b VMOVAPD %XMM30,-0x160(%RBP) |
0x23f092 VMOVAPD %XMM31,-0x150(%RBP) |
0x23f099 VBROADCASTSD %XMM15,%YMM17 |
0x23f09f VBROADCASTSD %XMM2,%YMM18 |
0x23f0a5 VPERMPD $0x55,%YMM2,%YMM19 |
0x23f0ac VBROADCASTSD %XMM16,%YMM8 |
0x23f0b2 VBROADCASTSD %XMM20,%YMM21 |
0x23f0b8 VBROADCASTSD %XMM5,%YMM22 |
0x23f0be VBROADCASTSD %XMM6,%YMM23 |
0x23f0c4 VBROADCASTSD %XMM3,%YMM24 |
0x23f0ca VMOVAPD %XMM9,-0xd0(%RBP) |
0x23f0d2 VBROADCASTSD %XMM9,%YMM25 |
0x23f0d8 VBROADCASTSD %XMM7,%YMM26 |
0x23f0de VMOVAPD %XMM10,-0xc0(%RBP) |
0x23f0e6 VBROADCASTSD %XMM10,%YMM27 |
0x23f0ec VBROADCASTSD %XMM4,%YMM28 |
0x23f0f2 VBROADCASTSD %XMM11,%YMM29 |
0x23f0f8 VBROADCASTSD %XMM30,%YMM30 |
0x23f0fe VBROADCASTSD %XMM31,%YMM31 |
0x23f104 VBROADCASTSD %XMM0,%YMM9 |
0x23f109 VBROADCASTSD %XMM1,%YMM10 |
0x23f10e MOV %R15D,%EBX |
0x23f111 VMOVAPD %XMM3,-0x110(%RBP) |
0x23f119 VMOVAPD %XMM4,-0x100(%RBP) |
0x23f121 AND $-0x8,%EBX |
0x23f124 VXORPD %XMM4,%XMM4,%XMM4 |
0x23f128 VXORPD %XMM3,%XMM3,%XMM3 |
0x23f12c VMOVSD %XMM14,-0x98(%RBP) |
0x23f134 VMOVAPD %XMM15,-0x190(%RBP) |
0x23f13c VMOVAPD %XMM16,-0x180(%RBP) |
0x23f143 VMOVAPD %XMM20,-0x170(%RBP) |
0x23f14a VMOVAPD %XMM5,-0x120(%RBP) |
0x23f152 VMOVAPD %XMM6,-0xf0(%RBP) |
0x23f15a VMOVAPD %XMM7,-0xe0(%RBP) |
0x23f162 VMOVAPD %XMM11,-0xb0(%RBP) |
0x23f16a VMOVAPD %XMM0,-0x140(%RBP) |
0x23f172 VMOVAPD %XMM1,-0x130(%RBP) |
0x23f17a XOR %EAX,%EAX |
0x23f17c SAL $0x6,%R14 |
(324) 0x23f180 VMULPD (%RSI,%RAX,1),%YMM17,%YMM11 |
(324) 0x23f187 VMULPD 0x20(%RSI,%RAX,1),%YMM17,%YMM13 |
(324) 0x23f18f VMOVAPD %YMM8,%YMM20 |
(324) 0x23f195 ADD $0x40,%RAX |
(324) 0x23f199 VCVTTPD2DQ %YMM11,%XMM0 |
(324) 0x23f19e VCVTTPD2DQ %YMM13,%XMM1 |
(324) 0x23f1a3 VROUNDPD $0xb,%YMM11,%YMM12 |
(324) 0x23f1a9 VROUNDPD $0xb,%YMM13,%YMM14 |
(324) 0x23f1af VPMOVSXDQ %XMM0,%YMM5 |
(324) 0x23f1b4 VPMOVSXDQ %XMM1,%YMM6 |
(324) 0x23f1b9 VSUBPD %YMM12,%YMM11,%YMM12 |
(324) 0x23f1be VSUBPD %YMM14,%YMM13,%YMM11 |
(324) 0x23f1c3 VPEXTRQ $0x1,%XMM5,%RDX |
(324) 0x23f1c9 VMOVQ %XMM5,%RDI |
(324) 0x23f1ce VEXTRACTI128 $0x1,%YMM5,%XMM5 |
(324) 0x23f1d4 VMOVQ %XMM6,%R8 |
(324) 0x23f1d9 VPEXTRQ $0x1,%XMM6,%RCX |
(324) 0x23f1df VMOVQ %XMM5,%R12 |
(324) 0x23f1e4 VPEXTRQ $0x1,%XMM5,%R13 |
(324) 0x23f1ea VEXTRACTI128 $0x1,%YMM6,%XMM5 |
(324) 0x23f1f0 VMOVSD (%R11,%RDI,8),%XMM6 |
(324) 0x23f1f6 VMOVSD (%R11,%R8,8),%XMM7 |
(324) 0x23f1fc VMULPD %YMM12,%YMM12,%YMM16 |
(324) 0x23f202 VFMADD213PD %YMM21,%YMM11,%YMM20 |
(324) 0x23f208 VMULPD %YMM11,%YMM11,%YMM15 |
(324) 0x23f20d VMOVQ %XMM5,%R9 |
(324) 0x23f212 VPEXTRQ $0x1,%XMM5,%R10 |
(324) 0x23f218 VMOVSD (%R11,%R12,8),%XMM5 |
(324) 0x23f21e VMOVHPD (%R11,%RDX,8),%XMM6,%XMM6 |
(324) 0x23f224 VMOVHPD (%R11,%RCX,8),%XMM7,%XMM7 |
(324) 0x23f22a VMOVHPD (%R11,%R13,8),%XMM5,%XMM5 |
(324) 0x23f230 VMULPD %YMM12,%YMM16,%YMM14 |
(324) 0x23f236 VMULPD %YMM11,%YMM15,%YMM13 |
(324) 0x23f23b VFMADD231PD %YMM19,%YMM15,%YMM20 |
(324) 0x23f241 VFMADD231PD %YMM18,%YMM13,%YMM20 |
(324) 0x23f247 VINSERTF128 $0x1,%XMM5,%YMM6,%YMM5 |
(324) 0x23f24d VMOVSD (%R11,%R9,8),%XMM6 |
(324) 0x23f253 VMOVHPD (%R11,%R10,8),%XMM6,%XMM6 |
(324) 0x23f259 VINSERTF128 $0x1,%XMM6,%YMM7,%YMM6 |
(324) 0x23f25f VMOVAPD %YMM8,%YMM7 |
(324) 0x23f263 VFMADD213PD %YMM21,%YMM12,%YMM7 |
(324) 0x23f269 VFMADD213PD %YMM3,%YMM6,%YMM20 |
(324) 0x23f26f VMOVAPD %YMM24,%YMM6 |
(324) 0x23f275 VFMADD213PD %YMM25,%YMM11,%YMM6 |
(324) 0x23f27b VFMADD231PD %YMM19,%YMM16,%YMM7 |
(324) 0x23f281 VFMADD231PD %YMM23,%YMM15,%YMM6 |
(324) 0x23f287 VFMADD231PD %YMM18,%YMM14,%YMM7 |
(324) 0x23f28d VFMADD231PD %YMM22,%YMM13,%YMM6 |
(324) 0x23f293 VFMADD213PD %YMM4,%YMM5,%YMM7 |
(324) 0x23f298 VPCMPEQD %XMM4,%XMM4,%XMM4 |
(324) 0x23f29c VPSUBD %XMM4,%XMM0,%XMM3 |
(324) 0x23f2a0 VPSUBD %XMM4,%XMM1,%XMM4 |
(324) 0x23f2a4 VPMOVSXDQ %XMM3,%YMM3 |
(324) 0x23f2a9 VPMOVSXDQ %XMM4,%YMM4 |
(324) 0x23f2ae VMOVQ %XMM3,%RCX |
(324) 0x23f2b3 VPEXTRQ $0x1,%XMM3,%RDX |
(324) 0x23f2b9 VEXTRACTI128 $0x1,%YMM3,%XMM3 |
(324) 0x23f2bf VMOVQ %XMM4,%R9 |
(324) 0x23f2c4 VPEXTRQ $0x1,%XMM4,%R10 |
(324) 0x23f2ca VPEXTRQ $0x1,%XMM3,%RDI |
(324) 0x23f2d0 VMOVQ %XMM3,%R8 |
(324) 0x23f2d5 VEXTRACTI128 $0x1,%YMM4,%XMM3 |
(324) 0x23f2db VMOVSD (%R11,%RCX,8),%XMM4 |
(324) 0x23f2e1 VMOVSD (%R11,%R9,8),%XMM5 |
(324) 0x23f2e7 VMOVQ %XMM3,%R12 |
(324) 0x23f2ec VPEXTRQ $0x1,%XMM3,%R13 |
(324) 0x23f2f2 VMOVSD (%R11,%R8,8),%XMM3 |
(324) 0x23f2f8 VMOVHPD (%R11,%RDX,8),%XMM4,%XMM4 |
(324) 0x23f2fe VMOVHPD (%R11,%R10,8),%XMM5,%XMM5 |
(324) 0x23f304 VMOVHPD (%R11,%RDI,8),%XMM3,%XMM3 |
(324) 0x23f30a VINSERTF128 $0x1,%XMM3,%YMM4,%YMM3 |
(324) 0x23f310 VMOVSD (%R11,%R12,8),%XMM4 |
(324) 0x23f316 VMOVHPD (%R11,%R13,8),%XMM4,%XMM4 |
(324) 0x23f31c VINSERTF128 $0x1,%XMM4,%YMM5,%YMM4 |
(324) 0x23f322 VMOVAPD %YMM24,%YMM5 |
(324) 0x23f328 VFMADD213PD %YMM25,%YMM12,%YMM5 |
(324) 0x23f32e VFMADD213PD %YMM20,%YMM4,%YMM6 |
(324) 0x23f334 VMOVAPD %YMM28,%YMM20 |
(324) 0x23f33a VFMADD213PD %YMM29,%YMM11,%YMM20 |
(324) 0x23f340 VFMADD213PD %YMM10,%YMM9,%YMM11 |
(324) 0x23f345 VFMADD231PD %YMM23,%YMM16,%YMM5 |
(324) 0x23f34b VFMADD231PD %YMM27,%YMM15,%YMM20 |
(324) 0x23f351 VFMADD231PD %YMM15,%YMM31,%YMM11 |
(324) 0x23f357 VFMADD231PD %YMM22,%YMM14,%YMM5 |
(324) 0x23f35d VFMADD231PD %YMM26,%YMM13,%YMM20 |
(324) 0x23f363 VFMADD231PD %YMM13,%YMM30,%YMM11 |
(324) 0x23f369 VFMADD213PD %YMM7,%YMM3,%YMM5 |
(324) 0x23f36e VPBROADCASTD -0x3b297(%RIP),%XMM3 |
(324) 0x23f377 VPADDD %XMM3,%XMM0,%XMM4 |
(324) 0x23f37b VPADDD %XMM3,%XMM1,%XMM3 |
(324) 0x23f37f VPMOVSXDQ %XMM4,%YMM4 |
(324) 0x23f384 VPMOVSXDQ %XMM3,%YMM3 |
(324) 0x23f389 VMOVQ %XMM4,%RCX |
(324) 0x23f38e VPEXTRQ $0x1,%XMM4,%RDX |
(324) 0x23f394 VEXTRACTI128 $0x1,%YMM4,%XMM4 |
(324) 0x23f39a VMOVQ %XMM3,%R10 |
(324) 0x23f39f VPEXTRQ $0x1,%XMM3,%R9 |
(324) 0x23f3a5 VEXTRACTI128 $0x1,%YMM3,%XMM3 |
(324) 0x23f3ab VMOVQ %XMM4,%RDI |
(324) 0x23f3b0 VPEXTRQ $0x1,%XMM4,%R8 |
(324) 0x23f3b6 VMOVSD (%R11,%RCX,8),%XMM4 |
(324) 0x23f3bc VMOVQ %XMM3,%R12 |
(324) 0x23f3c1 VPEXTRQ $0x1,%XMM3,%R13 |
(324) 0x23f3c7 VMOVSD (%R11,%R10,8),%XMM7 |
(324) 0x23f3cd VMOVSD (%R11,%RDI,8),%XMM3 |
(324) 0x23f3d3 VMOVHPD (%R11,%RDX,8),%XMM4,%XMM4 |
(324) 0x23f3d9 VMOVHPD (%R11,%R9,8),%XMM7,%XMM7 |
(324) 0x23f3df VMOVHPD (%R11,%R8,8),%XMM3,%XMM3 |
(324) 0x23f3e5 VINSERTF128 $0x1,%XMM3,%YMM4,%YMM3 |
(324) 0x23f3eb VMOVSD (%R11,%R12,8),%XMM4 |
(324) 0x23f3f1 VMOVHPD (%R11,%R13,8),%XMM4,%XMM4 |
(324) 0x23f3f7 VINSERTF128 $0x1,%XMM4,%YMM7,%YMM4 |
(324) 0x23f3fd VMOVAPD %YMM28,%YMM7 |
(324) 0x23f403 VFMADD213PD %YMM29,%YMM12,%YMM7 |
(324) 0x23f409 VFMADD213PD %YMM10,%YMM9,%YMM12 |
(324) 0x23f40e VFMADD213PD %YMM6,%YMM4,%YMM20 |
(324) 0x23f414 VFMADD231PD %YMM27,%YMM16,%YMM7 |
(324) 0x23f41a VFMADD231PD %YMM16,%YMM31,%YMM12 |
(324) 0x23f420 VFMADD231PD %YMM26,%YMM14,%YMM7 |
(324) 0x23f426 VFMADD231PD %YMM14,%YMM30,%YMM12 |
(324) 0x23f42c VFMADD213PD %YMM5,%YMM3,%YMM7 |
(324) 0x23f431 VPBROADCASTD -0x3b372(%RIP),%XMM3 |
(324) 0x23f43a VMOVAPD %YMM12,%YMM4 |
(324) 0x23f43e VPADDD %XMM3,%XMM0,%XMM0 |
(324) 0x23f442 VPADDD %XMM3,%XMM1,%XMM1 |
(324) 0x23f446 VPMOVSXDQ %XMM0,%YMM0 |
(324) 0x23f44b VPMOVSXDQ %XMM1,%YMM1 |
(324) 0x23f450 VMOVQ %XMM0,%RCX |
(324) 0x23f455 VPEXTRQ $0x1,%XMM0,%RDX |
(324) 0x23f45b VEXTRACTI128 $0x1,%YMM0,%XMM0 |
(324) 0x23f461 VMOVQ %XMM1,%R9 |
(324) 0x23f466 VPEXTRQ $0x1,%XMM1,%R10 |
(324) 0x23f46c VPEXTRQ $0x1,%XMM0,%RDI |
(324) 0x23f472 VMOVQ %XMM0,%R8 |
(324) 0x23f477 VEXTRACTI128 $0x1,%YMM1,%XMM0 |
(324) 0x23f47d VMOVSD (%R11,%RCX,8),%XMM1 |
(324) 0x23f483 VMOVSD (%R11,%R9,8),%XMM3 |
(324) 0x23f489 VMOVQ %XMM0,%R12 |
(324) 0x23f48e VPEXTRQ $0x1,%XMM0,%R13 |
(324) 0x23f494 VMOVSD (%R11,%R8,8),%XMM0 |
(324) 0x23f49a VMOVHPD (%R11,%RDX,8),%XMM1,%XMM1 |
(324) 0x23f4a0 VMOVHPD (%R11,%R10,8),%XMM3,%XMM3 |
(324) 0x23f4a6 VMOVHPD (%R11,%RDI,8),%XMM0,%XMM0 |
(324) 0x23f4ac VINSERTF128 $0x1,%XMM0,%YMM1,%YMM0 |
(324) 0x23f4b2 VMOVSD (%R11,%R12,8),%XMM1 |
(324) 0x23f4b8 VMOVHPD (%R11,%R13,8),%XMM1,%XMM1 |
(324) 0x23f4be VFMADD213PD %YMM7,%YMM0,%YMM4 |
(324) 0x23f4c3 VINSERTF128 $0x1,%XMM1,%YMM3,%YMM1 |
(324) 0x23f4c9 VMOVAPD %YMM11,%YMM3 |
(324) 0x23f4cd VFMADD213PD %YMM20,%YMM1,%YMM3 |
(324) 0x23f4d3 CMP %RAX,%R14 |
(324) 0x23f4d6 JNE 23f180 |
0x23f4dc VADDPD %YMM4,%YMM3,%YMM0 |
0x23f4e0 VMOVSD -0x98(%RBP),%XMM14 |
0x23f4e8 VMOVAPD -0x190(%RBP),%XMM15 |
0x23f4f0 VMOVAPD -0x180(%RBP),%XMM16 |
0x23f4f7 VMOVAPD -0x170(%RBP),%XMM20 |
0x23f4fe VMOVAPD -0x160(%RBP),%XMM30 |
0x23f505 VMOVAPD -0x150(%RBP),%XMM31 |
0x23f50c VMOVAPD -0x140(%RBP),%XMM17 |
0x23f513 VMOVAPD -0x130(%RBP),%XMM18 |
0x23f51a VMOVAPD -0x120(%RBP),%XMM5 |
0x23f522 VMOVAPD -0x110(%RBP),%XMM3 |
0x23f52a VMOVAPD -0x100(%RBP),%XMM4 |
0x23f532 VMOVAPD -0xf0(%RBP),%XMM6 |
0x23f53a VMOVAPD -0xe0(%RBP),%XMM7 |
0x23f542 VMOVAPD -0xd0(%RBP),%XMM9 |
0x23f54a VMOVAPD -0xc0(%RBP),%XMM10 |
0x23f552 VMOVAPD -0xb0(%RBP),%XMM11 |
0x23f55a MOV -0x90(%RBP),%R12 |
0x23f561 MOV -0x88(%RBP),%RDI |
0x23f568 MOV -0x80(%RBP),%R8 |
0x23f56c MOV -0x78(%RBP),%R9 |
0x23f570 MOV -0x70(%RBP),%R10 |
0x23f574 MOV -0x68(%RBP),%R13 |
0x23f578 MOV -0x2c(%RBP),%ECX |
0x23f57b MOV -0x38(%RBP),%R14 |
0x23f57f VEXTRACTF128 $0x1,%YMM0,%XMM1 |
0x23f585 VADDPD %XMM1,%XMM0,%XMM0 |
0x23f589 VPERMILPD $0x1,%XMM0,%XMM1 |
0x23f58f VADDSD %XMM1,%XMM0,%XMM0 |
0x23f593 CMP %R15,%RBX |
0x23f596 JE 23ef70 |
0x23f59c VUNPCKLPD %XMM4,%XMM3,%XMM1 |
0x23f5a0 VUNPCKLPD %XMM11,%XMM9,%XMM3 |
0x23f5a5 VUNPCKLPD %XMM10,%XMM6,%XMM4 |
0x23f5aa VUNPCKLPD %XMM7,%XMM5,%XMM9 |
0x23f5ae XCHG %AX,%AX |
(325) 0x23f5b0 VMULSD (%RSI,%RBX,8),%XMM15,%XMM5 |
(325) 0x23f5b5 VMOVAPD %XMM16,%XMM12 |
(325) 0x23f5bb INC %RBX |
(325) 0x23f5be VROUNDSD $0xb,%XMM5,%XMM5,%XMM6 |
(325) 0x23f5c4 VCVTTSD2SI %XMM5,%EAX |
(325) 0x23f5c8 CLTQ |
(325) 0x23f5ca VSUBSD %XMM6,%XMM5,%XMM5 |
(325) 0x23f5ce VMULSD %XMM5,%XMM5,%XMM6 |
(325) 0x23f5d2 VFMADD213SD %XMM20,%XMM5,%XMM12 |
(325) 0x23f5d8 VMULSD %XMM5,%XMM6,%XMM7 |
(325) 0x23f5dc VMOVDDUP %XMM6,%XMM10 |
(325) 0x23f5e0 VMOVDDUP %XMM7,%XMM8 |
(325) 0x23f5e4 VUNPCKLPD %XMM10,%XMM8,%XMM11 |
(325) 0x23f5e9 VMULPD %XMM2,%XMM11,%XMM11 |
(325) 0x23f5ed VPERMILPD $0x1,%XMM11,%XMM13 |
(325) 0x23f5f3 VADDSD %XMM11,%XMM12,%XMM11 |
(325) 0x23f5f8 VADDSD %XMM13,%XMM11,%XMM11 |
(325) 0x23f5fd VMOVDDUP %XMM5,%XMM13 |
(325) 0x23f601 VFMADD213SD %XMM18,%XMM17,%XMM5 |
(325) 0x23f607 VFMADD132SD (%R11,%RAX,8),%XMM0,%XMM11 |
(325) 0x23f60d VFMADD213PD %XMM3,%XMM1,%XMM13 |
(325) 0x23f612 VFMADD231SD %XMM6,%XMM31,%XMM5 |
(325) 0x23f618 VFMADD231PD %XMM10,%XMM4,%XMM13 |
(325) 0x23f61d VFMADD231SD %XMM7,%XMM30,%XMM5 |
(325) 0x23f623 VFMADD231PD %XMM8,%XMM9,%XMM13 |
(325) 0x23f628 VMULPD 0x8(%R11,%RAX,8),%XMM13,%XMM8 |
(325) 0x23f62f VPERMILPD $0x1,%XMM8,%XMM0 |
(325) 0x23f635 VADDSD %XMM0,%XMM8,%XMM0 |
(325) 0x23f639 VADDSD %XMM0,%XMM11,%XMM0 |
(325) 0x23f63d VFMADD231SD 0x18(%R11,%RAX,8),%XMM5,%XMM0 |
(325) 0x23f644 CMP %RBX,%R15 |
(325) 0x23f647 JNE 23f5b0 |
0x23f64d JMP 23ef70 |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 313 - 316 |
-------------------------------------------------------------------------------- |
313: inline int first(int igroup) const { return (*group_offsets_)[igroup]; } |
314: |
315: ///return the last index of a group i |
316: inline int last(int igroup) const { return (*group_offsets_)[igroup + 1]; } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 1064 - 1064 |
-------------------------------------------------------------------------------- |
1064: return *(this->_M_impl._M_start + __n); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowRef.h: 127 - 132 |
-------------------------------------------------------------------------------- |
127: for (int jg = 0; jg < NumGroups; ++jg) |
128: { |
129: const FuncType& f2(*F[igt + jg]); |
130: int iStart = P.first(jg); |
131: int iEnd = P.last(jg); |
132: curUat += f2.evaluateV(iat, iStart, iEnd, dist, DistCompressed.data()); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-855-3059/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 236 - 260 |
-------------------------------------------------------------------------------- |
236: for (int jat = 0; jat < iLimit; jat++) |
237: { |
238: real_type r = distArray[jat]; |
239: // pick the distances smaller than the cutoff and avoid the reference atom |
240: if (r < cutoff_radius && iStart + jat != iat) |
241: distArrayCompressed[iCount++] = distArray[jat]; |
242: } |
243: |
244: real_type d = 0.0; |
245: //#pragma omp simd reduction(+:d) |
246: for (int jat = 0; jat < iCount; jat++) |
247: { |
248: real_type r = distArrayCompressed[jat]; |
249: r *= DeltaRInv; |
250: int i = (int)r; |
251: real_type t = r - real_type(i); |
252: real_type tp0 = t * t * t; |
253: real_type tp1 = t * t; |
254: real_type tp2 = t; |
255: |
256: real_type d1 = SplineCoefs[i + 0] * (A[0] * tp0 + A[1] * tp1 + A[2] * tp2 + A[3]); |
257: real_type d2 = SplineCoefs[i + 1] * (A[4] * tp0 + A[5] * tp1 + A[6] * tp2 + A[7]); |
258: real_type d3 = SplineCoefs[i + 2] * (A[8] * tp0 + A[9] * tp1 + A[10] * tp2 + A[11]); |
259: real_type d4 = SplineCoefs[i + 3] * (A[12] * tp0 + A[13] * tp1 + A[14] * tp2 + A[15]); |
260: d += (d1 + d2 + d3 + d4); |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.33 |
CQA speedup if FP arith vectorized | 1.07 |
CQA speedup if fully vectorized | 3.39 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.02 |
Bottlenecks | micro-operation queue, |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source | ParticleSet.h:313-313,ParticleSet.h:316-316,TwoBodyJastrowRef.h:127-127,TwoBodyJastrowRef.h:132-132,BsplineFunctor.h:236-236,BsplineFunctor.h:241-241,BsplineFunctor.h:246-246,BsplineFunctor.h:258-258 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 21.67 |
CQA cycles if no scalar integer | 16.33 |
CQA cycles if FP arith vectorized | 20.33 |
CQA cycles if fully vectorized | 6.40 |
Front-end cycles | 21.67 |
DIV/SQRT cycles | 4.25 |
P0 cycles | 4.25 |
P1 cycles | 4.25 |
P2 cycles | 4.25 |
P3 cycles | 4.00 |
P4 cycles | 21.33 |
P5 cycles | 21.33 |
P6 cycles | 21.33 |
P7 cycles | 1.00 |
P8 cycles | 9.00 |
P9 cycles | 8.50 |
P10 cycles | 8.50 |
P11 cycles | 8.00 |
P12 cycles | 8.00 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 131.00 |
Nb uops | 130.00 |
Nb loads | 46.00 |
Nb stores | 18.00 |
Nb stack references | 24.00 |
FLOP/cycle | 0.37 |
Nb FLOP add-sub | 8.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 34.34 |
Bytes prefetched | 0.00 |
Bytes loaded | 484.00 |
Bytes stored | 260.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 41.58 |
Vectorization ratio load | 38.10 |
Vectorization ratio store | 83.33 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 40.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 25.00 |
Vector-efficiency ratio all | 17.76 |
Vector-efficiency ratio load | 16.82 |
Vector-efficiency ratio store | 22.57 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 22.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 15.80 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.33 |
CQA speedup if FP arith vectorized | 1.07 |
CQA speedup if fully vectorized | 3.39 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.02 |
Bottlenecks | micro-operation queue, |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source | ParticleSet.h:313-313,ParticleSet.h:316-316,TwoBodyJastrowRef.h:127-127,TwoBodyJastrowRef.h:132-132,BsplineFunctor.h:236-236,BsplineFunctor.h:241-241,BsplineFunctor.h:246-246,BsplineFunctor.h:258-258 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 21.67 |
CQA cycles if no scalar integer | 16.33 |
CQA cycles if FP arith vectorized | 20.33 |
CQA cycles if fully vectorized | 6.40 |
Front-end cycles | 21.67 |
DIV/SQRT cycles | 4.25 |
P0 cycles | 4.25 |
P1 cycles | 4.25 |
P2 cycles | 4.25 |
P3 cycles | 4.00 |
P4 cycles | 21.33 |
P5 cycles | 21.33 |
P6 cycles | 21.33 |
P7 cycles | 1.00 |
P8 cycles | 9.00 |
P9 cycles | 8.50 |
P10 cycles | 8.50 |
P11 cycles | 8.00 |
P12 cycles | 8.00 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 131.00 |
Nb uops | 130.00 |
Nb loads | 46.00 |
Nb stores | 18.00 |
Nb stack references | 24.00 |
FLOP/cycle | 0.37 |
Nb FLOP add-sub | 8.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 34.34 |
Bytes prefetched | 0.00 |
Bytes loaded | 484.00 |
Bytes stored | 260.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 41.58 |
Vectorization ratio load | 38.10 |
Vectorization ratio store | 83.33 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 40.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 25.00 |
Vector-efficiency ratio all | 17.76 |
Vector-efficiency ratio load | 16.82 |
Vector-efficiency ratio store | 22.57 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 22.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 15.80 |
Path / |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source file and lines | BsplineFunctor.h:236-260 |
Module | exec |
nb instructions | 131 |
nb uops | 130 |
loop length | 694 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 19 |
used ymm registers | 21 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 21.67 cycles |
front end | 21.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.25 | 4.25 | 4.25 | 4.25 | 4.00 | 21.33 | 21.33 | 21.33 | 1.00 | 9.00 | 8.50 | 8.50 | 8.00 | 8.00 |
cycles | 4.25 | 4.25 | 4.25 | 4.25 | 4.00 | 21.33 | 21.33 | 21.33 | 1.00 | 9.00 | 8.50 | 8.50 | 8.00 | 8.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 21.67 |
Dispatch | 21.33 |
Overall L1 | 21.67 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 51% |
load | 48% |
store | 93% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 31% |
all | 41% |
load | 38% |
store | 83% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 10% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 19% |
load | 18% |
store | 24% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
all | 17% |
load | 16% |
store | 22% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 22% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VADDSD %XMM0,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
CMP %RDI,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 23ee75 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x45> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R14,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %ECX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x4(%R13,%RAX,4),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
INC %R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %ECX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDX,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 23ef70 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x140> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
ADD %R9,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %EDX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R12,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EBX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %ECX,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV (%R10,%RAX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%R8,%RDX,8),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %RDX,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD 0x8(%RAX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
JMP 23efc8 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x198> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV -0x2c(%RBP),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x38(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
TEST %EBX,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 23ef70 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x140> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD 0x238(%RAX),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVUPD 0x18(%RAX),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD 0x28(%RAX),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x30(%RAX),%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x48(%RAX),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x68(%RAX),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x38(%RAX),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x40(%RAX),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x50(%RAX),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x58(%RAX),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x60(%RAX),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x70(%RAX),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x78(%RAX),%XMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x80(%RAX),%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x88(%RAX),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x90(%RAX),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x218(%RAX),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EBX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x8,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 23f078 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x248> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 23f59c <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x76c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VMOVAPD %XMM17,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %XMM18,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x3,%R14 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM30,-0x160(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM31,-0x150(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM15,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM2,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VPERMPD $0x55,%YMM2,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD %XMM16,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM20,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM5,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM6,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM3,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM9,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM9,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM7,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM10,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM10,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM4,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM11,%YMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM30,%YMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM31,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM0,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM1,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %XMM3,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM4,-0x100(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
AND $-0x8,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD %XMM14,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVAPD %XMM15,-0x190(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM16,-0x180(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM20,-0x170(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM5,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM6,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM7,-0xe0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM11,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM0,-0x140(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM1,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x6,%R14 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VADDPD %YMM4,%YMM3,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD -0x98(%RBP),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD -0x190(%RBP),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x180(%RBP),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x170(%RBP),%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x160(%RBP),%XMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x150(%RBP),%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x140(%RBP),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x130(%RBP),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x120(%RBP),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x110(%RBP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x100(%RBP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0xf0(%RBP),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0xe0(%RBP),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0xd0(%RBP),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0xc0(%RBP),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0xb0(%RBP),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV -0x90(%RBP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x88(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x80(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x78(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x70(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x68(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x2c(%RBP),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x38(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VADDPD %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
CMP %R15,%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 23ef70 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x140> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VUNPCKLPD %XMM4,%XMM3,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM11,%XMM9,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM10,%XMM6,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM7,%XMM5,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
JMP 23ef70 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x140> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source file and lines | BsplineFunctor.h:236-260 |
Module | exec |
nb instructions | 131 |
nb uops | 130 |
loop length | 694 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 19 |
used ymm registers | 21 |
used zmm registers | 0 |
nb stack references | 24 |
micro-operation queue | 21.67 cycles |
front end | 21.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.25 | 4.25 | 4.25 | 4.25 | 4.00 | 21.33 | 21.33 | 21.33 | 1.00 | 9.00 | 8.50 | 8.50 | 8.00 | 8.00 |
cycles | 4.25 | 4.25 | 4.25 | 4.25 | 4.00 | 21.33 | 21.33 | 21.33 | 1.00 | 9.00 | 8.50 | 8.50 | 8.00 | 8.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 21.67 |
Dispatch | 21.33 |
Overall L1 | 21.67 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 51% |
load | 48% |
store | 93% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 31% |
all | 41% |
load | 38% |
store | 83% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 10% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 19% |
load | 18% |
store | 24% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
all | 17% |
load | 16% |
store | 22% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 22% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VADDSD %XMM0,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
CMP %RDI,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 23ee75 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x45> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R14,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %ECX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x4(%R13,%RAX,4),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
INC %R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %ECX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDX,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 23ef70 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x140> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
ADD %R9,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %EDX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R12,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EBX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %ECX,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV (%R10,%RAX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%R8,%RDX,8),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %RDX,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD 0x8(%RAX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
JMP 23efc8 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x198> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV -0x2c(%RBP),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x38(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
TEST %EBX,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 23ef70 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x140> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD 0x238(%RAX),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVUPD 0x18(%RAX),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVSD 0x28(%RAX),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x30(%RAX),%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x48(%RAX),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x68(%RAX),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x38(%RAX),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x40(%RAX),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x50(%RAX),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x58(%RAX),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x60(%RAX),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x70(%RAX),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x78(%RAX),%XMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x80(%RAX),%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x88(%RAX),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x90(%RAX),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x218(%RAX),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EBX,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x8,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 23f078 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x248> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 23f59c <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x76c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VMOVAPD %XMM17,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %XMM18,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R15,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x3,%R14 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM30,-0x160(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM31,-0x150(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM15,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM2,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VPERMPD $0x55,%YMM2,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 0.50 |
VBROADCASTSD %XMM16,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM20,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM5,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM6,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM3,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM9,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM9,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM7,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD %XMM10,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VBROADCASTSD %XMM10,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM4,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM11,%YMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM30,%YMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM31,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM0,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM1,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVAPD %XMM3,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM4,-0x100(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
AND $-0x8,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD %XMM14,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 |
VMOVAPD %XMM15,-0x190(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM16,-0x180(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM20,-0x170(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM5,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM6,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM7,-0xe0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM11,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM0,-0x140(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VMOVAPD %XMM1,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x6,%R14 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VADDPD %YMM4,%YMM3,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VMOVSD -0x98(%RBP),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVAPD -0x190(%RBP),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x180(%RBP),%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x170(%RBP),%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x160(%RBP),%XMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x150(%RBP),%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x140(%RBP),%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x130(%RBP),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x120(%RBP),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x110(%RBP),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0x100(%RBP),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0xf0(%RBP),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0xe0(%RBP),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0xd0(%RBP),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0xc0(%RBP),%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD -0xb0(%RBP),%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV -0x90(%RBP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x88(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x80(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x78(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x70(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x68(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x2c(%RBP),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x38(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VEXTRACTF128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 |
VADDPD %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
VPERMILPD $0x1,%XMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VADDSD %XMM1,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 |
CMP %R15,%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JE 23ef70 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x140> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VUNPCKLPD %XMM4,%XMM3,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM11,%XMM9,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM10,%XMM6,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
VUNPCKLPD %XMM7,%XMM5,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
JMP 23ef70 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x140> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |