MD-Bench/static_analysis/rafael/analyses/gromacs-icx-avx512-sp-osaca-csx.out

85 lines
8.5 KiB
Plaintext
Raw Normal View History

Open Source Architecture Code Analyzer (OSACA) - 0.4.12
Analyzed file: gromacs-avx512-sp.s
Architecture: CSX
Timestamp: 2023-04-05 00:42:20
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
* - Instruction micro-ops not bound to a port
X - No throughput/latency information for this instruction in data file
Combined Analysis Report
------------------------
Port pressure in cycles
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
--------------------------------------------------------------------------------------------------
1615 | | | | | | | | || | | # LLVM-MCA-BEGIN
1616 | | | | | | | | || | | .LBB2_11: #
1617 | | | | | | | | || | | # Parent Loop BB2_7 Depth=1
1618 | | | | | | | | || | | # => This Inner Loop Header: Depth=2
1619 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | movslq (%rsi,%rdx,4), %rdi
1620 | | 1.00 | | | | 0.00 | | || 1.0 | | leaq (%rdi,%rdi,2), %rdi
1621 | 0.00 | | | | | | 1.00 | || 1.0 | | shlq $5, %rdi
1622 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmovupd (%rcx,%rdi), %zmm16
1623 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vinsertf64x4 $1, (%rcx,%rdi), %zmm16, %zmm17
1624 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vbroadcastf64x4 64(%rcx,%rdi), %zmm18 # zmm18 = mem[0,1,2,3,0,1,2,3]
1625 | | | | | | 1.00 | | || 3.0 | | vshuff64x2 $238, %zmm16, %zmm16, %zmm16 # zmm16 = zmm16[4,5,6,7,4,5,6,7]
1626 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm17, %zmm6, %zmm19
1627 | 0.50 | | | | | 0.50 | | || 4.0 | | vsubps %zmm16, %zmm10, %zmm20
1628 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm18, %zmm12, %zmm21
1629 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm17, %zmm9, %zmm17
1630 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm18, %zmm14, %zmm18
1631 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm16, %zmm11, %zmm16
1632 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm21, %zmm21, %zmm22
1633 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231ps %zmm20, %zmm20, %zmm22 # zmm22 = (zmm20 * zmm20) + zmm22
1634 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231ps %zmm19, %zmm19, %zmm22 # zmm22 = (zmm19 * zmm19) + zmm22
1635 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm18, %zmm18, %zmm23
1636 | 0.50 | | | | | 0.50 | | || | | vfmadd231ps %zmm16, %zmm16, %zmm23 # zmm23 = (zmm16 * zmm16) + zmm23
1637 | 0.50 | | | | | 0.50 | | || | | vfmadd231ps %zmm17, %zmm17, %zmm23 # zmm23 = (zmm17 * zmm17) + zmm23
1638 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14ps %zmm22, %zmm24
1639 | 2.50 | | | | | 0.50 | | || | | vrcp14ps %zmm23, %zmm25
1640 | | | | | | | | || | | X vcmpltps %zmm0, %zmm22, %k2
1641 | | | | | | | | || | | X vcmpltps %zmm0, %zmm23, %k1
1642 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulps %zmm29, %zmm24, %zmm22
1643 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm24, %zmm24, %zmm23
1644 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm29, %zmm25, %zmm26
1645 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulps %zmm22, %zmm23, %zmm22
1646 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm25, %zmm25, %zmm23
1647 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm26, %zmm23, %zmm23
1648 | 0.50 | | | | | 0.50 | | || 4.0 | | vaddps %zmm2, %zmm22, %zmm26
1649 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm24, %zmm1, %zmm24
1650 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm22, %zmm24, %zmm22
1651 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulps %zmm26, %zmm22, %zmm22
1652 | 0.50 | | | | | 0.50 | | || | | vaddps %zmm2, %zmm23, %zmm24
1653 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm25, %zmm1, %zmm25
1654 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm23, %zmm25, %zmm23
1655 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm24, %zmm23, %zmm23
1656 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231ps %zmm19, %zmm22, %zmm13 {%k2} # zmm13 = (zmm22 * zmm19) + zmm13
1657 | 0.50 | | | | | 0.50 | | || | | vfmadd231ps %zmm20, %zmm22, %zmm8 {%k2} # zmm8 = (zmm22 * zmm20) + zmm8
1658 | 0.50 | | | | | 0.50 | | || | | vfmadd231ps %zmm21, %zmm22, %zmm5 {%k2} # zmm5 = (zmm22 * zmm21) + zmm5
1659 | 0.00 | | | | | 1.00 | | || | | vfmadd231ps %zmm17, %zmm23, %zmm15 {%k1} # zmm15 = (zmm23 * zmm17) + zmm15
1660 | 0.00 | | | | | 1.00 | | || | | vfmadd231ps %zmm16, %zmm23, %zmm7 {%k1} # zmm7 = (zmm23 * zmm16) + zmm7
1661 | 0.00 | | | | | 1.00 | | || | 4.0 | vfmadd231ps %zmm18, %zmm23, %zmm4 {%k1} # zmm4 = (zmm23 * zmm18) + zmm4
1662 | 0.00 | 0.50 | | | | 0.00 | 0.50 | || | | incq %rdx
1663 | 0.00 | 1.00 | | | | 0.00 | 0.00 | || | | cmpq %rdx, %r12
1664 | | | | | | | | || | | * jne .LBB2_11
1665 | 0.00 | | | | | | 1.00 | || | | jmp .LBB2_12
1666 | | | | | | | | || | | # LLVM-MCA-END
20.0 2.50 2.00 2.00 2.00 2.00 20.0 2.50 53.0 4.0
Loop-Carried Dependencies Analysis Report
-----------------------------------------
1661 | 4.0 | vfmadd231ps %zmm18, %zmm23, %zmm4 {%k1} # zmm4 = (zmm23 * zmm18) + zmm4| [1661]
1660 | 4.0 | vfmadd231ps %zmm16, %zmm23, %zmm7 {%k1} # zmm7 = (zmm23 * zmm16) + zmm7| [1660]
1659 | 4.0 | vfmadd231ps %zmm17, %zmm23, %zmm15 {%k1} # zmm15 = (zmm23 * zmm17) + zmm15| [1659]
1658 | 4.0 | vfmadd231ps %zmm21, %zmm22, %zmm5 {%k2} # zmm5 = (zmm22 * zmm21) + zmm5| [1658]
1657 | 4.0 | vfmadd231ps %zmm20, %zmm22, %zmm8 {%k2} # zmm8 = (zmm22 * zmm20) + zmm8| [1657]
1656 | 4.0 | vfmadd231ps %zmm19, %zmm22, %zmm13 {%k2} # zmm13 = (zmm22 * zmm19) + zmm13| [1656]
1662 | 1.0 | incq %rdx | [1662]