300776f512
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
85 lines
9.7 KiB
Plaintext
85 lines
9.7 KiB
Plaintext
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
|
Analyzed file: gromacs-avx512-sp.s
|
|
Architecture: ICX
|
|
Timestamp: 2023-04-05 00:42:45
|
|
|
|
|
|
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
|
* - Instruction micro-ops not bound to a port
|
|
X - No throughput/latency information for this instruction in data file
|
|
|
|
|
|
Combined Analysis Report
|
|
------------------------
|
|
Port pressure in cycles
|
|
| 0 - 0DV | 1 - 1DV | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 | 8 | 9 || CP | LCD |
|
|
------------------------------------------------------------------------------------------------------------------------
|
|
1615 | | | | | | | | | | || | | # LLVM-MCA-BEGIN
|
|
1616 | | | | | | | | | | || | | .LBB2_11: #
|
|
1617 | | | | | | | | | | || | | # Parent Loop BB2_7 Depth=1
|
|
1618 | | | | | | | | | | || | | # => This Inner Loop Header: Depth=2
|
|
1619 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | movslq (%rsi,%rdx,4), %rdi
|
|
1620 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || 6.0 | | leaq (%rdi,%rdi,2), %rdi
|
|
1621 | 0.00 | | | | | | 1.00 | | | || 1.0 | | shlq $5, %rdi
|
|
1622 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | vmovupd (%rcx,%rdi), %zmm16
|
|
1623 | | | 0.50 0.50 | 0.50 0.50 | | 1.000 | | | | || | | vinsertf64x4 $1, (%rcx,%rdi), %zmm16, %zmm17
|
|
1624 | | | | | | | | | | || | | X vbroadcastf64x4 64(%rcx,%rdi), %zmm18 # zmm18 = mem[0,1,2,3,0,1,2,3]
|
|
1625 | | | | | | | | | | || 0.0 | | X vshuff64x2 $238, %zmm16, %zmm16, %zmm16 # zmm16 = zmm16[4,5,6,7,4,5,6,7]
|
|
1626 | 0.50 | | | | | 0.500 | | | | || | | vsubps %zmm17, %zmm6, %zmm19
|
|
1627 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vsubps %zmm16, %zmm10, %zmm20
|
|
1628 | 0.50 | | | | | 0.500 | | | | || | | vsubps %zmm18, %zmm12, %zmm21
|
|
1629 | 0.50 | | | | | 0.500 | | | | || | | vsubps %zmm17, %zmm9, %zmm17
|
|
1630 | 0.50 | | | | | 0.500 | | | | || | | vsubps %zmm18, %zmm14, %zmm18
|
|
1631 | 0.50 | | | | | 0.500 | | | | || | | vsubps %zmm16, %zmm11, %zmm16
|
|
1632 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm21, %zmm21, %zmm22
|
|
1633 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231ps %zmm20, %zmm20, %zmm22 # zmm22 = (zmm20 * zmm20) + zmm22
|
|
1634 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231ps %zmm19, %zmm19, %zmm22 # zmm22 = (zmm19 * zmm19) + zmm22
|
|
1635 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm18, %zmm18, %zmm23
|
|
1636 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231ps %zmm16, %zmm16, %zmm23 # zmm23 = (zmm16 * zmm16) + zmm23
|
|
1637 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231ps %zmm17, %zmm17, %zmm23 # zmm23 = (zmm17 * zmm17) + zmm23
|
|
1638 | 2.50 | | | | | 0.500 | | | | || 6.0 | | vrcp14ps %zmm22, %zmm24
|
|
1639 | 2.50 | | | | | 0.500 | | | | || | | vrcp14ps %zmm23, %zmm25
|
|
1640 | | | | | | | | | | || | | X vcmpltps %zmm0, %zmm22, %k2
|
|
1641 | | | | | | | | | | || | | X vcmpltps %zmm0, %zmm23, %k1
|
|
1642 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulps %zmm29, %zmm24, %zmm22
|
|
1643 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm24, %zmm24, %zmm23
|
|
1644 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm29, %zmm25, %zmm26
|
|
1645 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulps %zmm22, %zmm23, %zmm22
|
|
1646 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm25, %zmm25, %zmm23
|
|
1647 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm26, %zmm23, %zmm23
|
|
1648 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vaddps %zmm2, %zmm22, %zmm26
|
|
1649 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm24, %zmm1, %zmm24
|
|
1650 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm22, %zmm24, %zmm22
|
|
1651 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulps %zmm26, %zmm22, %zmm22
|
|
1652 | 0.50 | | | | | 0.500 | | | | || | | vaddps %zmm2, %zmm23, %zmm24
|
|
1653 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm25, %zmm1, %zmm25
|
|
1654 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm23, %zmm25, %zmm23
|
|
1655 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm24, %zmm23, %zmm23
|
|
1656 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231ps %zmm19, %zmm22, %zmm13 {%k2} # zmm13 = (zmm22 * zmm19) + zmm13
|
|
1657 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231ps %zmm20, %zmm22, %zmm8 {%k2} # zmm8 = (zmm22 * zmm20) + zmm8
|
|
1658 | 0.25 | | | | | 0.750 | | | | || | | vfmadd231ps %zmm21, %zmm22, %zmm5 {%k2} # zmm5 = (zmm22 * zmm21) + zmm5
|
|
1659 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231ps %zmm17, %zmm23, %zmm15 {%k1} # zmm15 = (zmm23 * zmm17) + zmm15
|
|
1660 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231ps %zmm16, %zmm23, %zmm7 {%k1} # zmm7 = (zmm23 * zmm16) + zmm7
|
|
1661 | 0.24 | | | | | 0.760 | | | | || | 4.0 | vfmadd231ps %zmm18, %zmm23, %zmm4 {%k1} # zmm4 = (zmm23 * zmm18) + zmm4
|
|
1662 | 0.00 | 0.50 | | | | -0.01 | 0.50 | | | || | | incq %rdx
|
|
1663 | 0.00 | 0.75 | | | | -0.01 | 0.25 | | | || | | cmpq %rdx, %r12
|
|
1664 | | | | | | | | | | || | | * jne .LBB2_11
|
|
1665 | | | | | | | | | | || | | * jmp .LBB2_12
|
|
1666 | | | | | | | | | | || | | # LLVM-MCA-END
|
|
|
|
19.5 2.00 1.50 1.50 1.50 1.50 19.49 2.00 55.0 4.0
|
|
|
|
|
|
|
|
|
|
Loop-Carried Dependencies Analysis Report
|
|
-----------------------------------------
|
|
1661 | 4.0 | vfmadd231ps %zmm18, %zmm23, %zmm4 {%k1} # zmm4 = (zmm23 * zmm18) + zmm4| [1661]
|
|
1660 | 4.0 | vfmadd231ps %zmm16, %zmm23, %zmm7 {%k1} # zmm7 = (zmm23 * zmm16) + zmm7| [1660]
|
|
1659 | 4.0 | vfmadd231ps %zmm17, %zmm23, %zmm15 {%k1} # zmm15 = (zmm23 * zmm17) + zmm15| [1659]
|
|
1658 | 4.0 | vfmadd231ps %zmm21, %zmm22, %zmm5 {%k2} # zmm5 = (zmm22 * zmm21) + zmm5| [1658]
|
|
1657 | 4.0 | vfmadd231ps %zmm20, %zmm22, %zmm8 {%k2} # zmm8 = (zmm22 * zmm20) + zmm8| [1657]
|
|
1656 | 4.0 | vfmadd231ps %zmm19, %zmm22, %zmm13 {%k2} # zmm13 = (zmm22 * zmm19) + zmm13| [1656]
|
|
1662 | 1.0 | incq %rdx | [1662]
|
|
|