Add analysis files from gromacs-avx512-dp with ICX compiler
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
		@@ -0,0 +1,148 @@
 | 
				
			|||||||
 | 
					Intel(R) Architecture Code Analyzer Version -  v3.0-28-g1ba2cbb build date: 2017-10-23;16:42:45
 | 
				
			||||||
 | 
					Analyzed File -  gromacs-avx512-dp-ICX.o
 | 
				
			||||||
 | 
					Binary Format - 64Bit
 | 
				
			||||||
 | 
					Architecture  -  SKX
 | 
				
			||||||
 | 
					Analysis Type - Throughput
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Throughput Analysis Report
 | 
				
			||||||
 | 
					--------------------------
 | 
				
			||||||
 | 
					Block Throughput: 47.68 Cycles       Throughput Bottleneck: Backend
 | 
				
			||||||
 | 
					Loop Count:  22
 | 
				
			||||||
 | 
					Port Binding In Cycles Per Iteration:
 | 
				
			||||||
 | 
					--------------------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					|  Port  |   0   -  DV   |   1   |   2   -  D    |   3   -  D    |   4   |   5   |   6   |   7   |
 | 
				
			||||||
 | 
					--------------------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					| Cycles | 42.0     0.0  | 12.5  |  5.0     5.0  |  5.0     5.0  |  0.0  | 42.0  | 12.5  |  0.0  |
 | 
				
			||||||
 | 
					--------------------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DV - Divider pipe (on port 0)
 | 
				
			||||||
 | 
					D - Data fetch pipe (on ports 2 and 3)
 | 
				
			||||||
 | 
					F - Macro Fusion with the previous instruction occurred
 | 
				
			||||||
 | 
					* - instruction micro-ops not bound to a port
 | 
				
			||||||
 | 
					^ - Micro Fusion occurred
 | 
				
			||||||
 | 
					# - ESP Tracking sync uop was issued
 | 
				
			||||||
 | 
					@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected
 | 
				
			||||||
 | 
					X - instruction not supported, was not accounted in Analysis
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					| Num Of   |                    Ports pressure in cycles                         |      |
 | 
				
			||||||
 | 
					|  Uops    |  0  - DV    |  1   |  2  -  D    |  3  -  D    |  4   |  5   |  6   |  7   |
 | 
				
			||||||
 | 
					-----------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					|   1      |             |      | 1.0     1.0 |             |      |      |      |      | movsxd rbx, dword ptr [r12+r14*4]
 | 
				
			||||||
 | 
					|   1      |             | 1.0  |             |             |      |      |      |      | lea rcx, ptr [rbx+rbx*2]
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      |      | 1.0  |      | shl rcx, 0x6
 | 
				
			||||||
 | 
					|   1      |             |      |             | 1.0     1.0 |      |      |      |      | vmovapd zmm29, zmmword ptr [rsi+rcx*1]
 | 
				
			||||||
 | 
					|   1      |             |      | 1.0     1.0 |             |      |      |      |      | vmovapd zmm30, zmmword ptr [rsi+rcx*1+0x40]
 | 
				
			||||||
 | 
					|   1      |             |      |             | 1.0     1.0 |      |      |      |      | vmovapd zmm31, zmmword ptr [rsi+rcx*1+0x80]
 | 
				
			||||||
 | 
					|   1      |             |      | 1.0     1.0 |             |      |      |      |      | vmovupd zmm3, zmmword ptr [rsp+0x40]
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vsubpd zmm4, zmm3, zmm29
 | 
				
			||||||
 | 
					|   1      |             |      |             | 1.0     1.0 |      |      |      |      | vmovupd zmm3, zmmword ptr [rsp+0x140]
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vsubpd zmm3, zmm3, zmm30
 | 
				
			||||||
 | 
					|   1      |             | 1.0  |             |             |      |      |      |      | lea ecx, ptr [rbx+rbx*1]
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      |      | 1.0  |      | cmp rdi, rcx
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      |      | 1.0  |      | setnz dl
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      |      | 1.0  |      | setz cl
 | 
				
			||||||
 | 
					|   1      |             | 1.0  |             |             |      |      |      |      | lea ebx, ptr [rbx+rbx*1+0x1]
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vsubpd zmm17, zmm25, zmm31
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm18, zmm17, zmm17
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd231pd zmm18, zmm3, zmm3
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd231pd zmm18, zmm4, zmm4
 | 
				
			||||||
 | 
					|   3      | 2.0         |      |             |             |      | 1.0  |      |      | vrcp14pd zmm19, zmm18
 | 
				
			||||||
 | 
					|   1      |             | 1.0  |             |             |      |      |      |      | cmp rdi, rbx
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      |      | 1.0  |      | setz bl
 | 
				
			||||||
 | 
					|   1*     |             |      |             |             |      |      |      |      | mov ebp, ebx
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm20, zmm19, zmm22
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm21, zmm19, zmm19
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm20, zmm21, zmm20
 | 
				
			||||||
 | 
					|   1      |             |      | 1.0     1.0 |             |      |      |      |      | vmovupd zmm21, zmmword ptr [rsp+0x80]
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vsubpd zmm21, zmm21, zmm29
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      |      | 1.0  |      | shl bpl, 0x4
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm19, zmm1, zmm19
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm19, zmm19, zmm20
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vaddpd zmm20, zmm20, zmm2
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm19, zmm19, zmm20
 | 
				
			||||||
 | 
					|   1      |             |      |             | 1.0     1.0 |      |      |      |      | vmovupd zmm20, zmmword ptr [rsp+0x100]
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vsubpd zmm20, zmm20, zmm30
 | 
				
			||||||
 | 
					|   1      |             | 1.0  |             |             |      |      |      |      | not bpl
 | 
				
			||||||
 | 
					|   1      |             | 1.0  |             |             |      |      |      |      | sub bpl, cl
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | kmovd k1, ebp
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vcmppd k1{k1}, zmm18, zmm0, 0x1
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vsubpd zmm18, zmm26, zmm31
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd231pd zmm15{k1}, zmm19, zmm4
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm4, zmm18, zmm18
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd231pd zmm4, zmm20, zmm20
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd231pd zmm4, zmm21, zmm21
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd231pd zmm12{k1}, zmm19, zmm3
 | 
				
			||||||
 | 
					|   3      | 2.0         |      |             |             |      | 1.0  |      |      | vrcp14pd zmm3, zmm4
 | 
				
			||||||
 | 
					|   1      |             | 1.0  |             |             |      |      |      |      | lea ecx, ptr [rdx+rdx*1]
 | 
				
			||||||
 | 
					|   1*     |             |      |             |             |      |      |      |      | mov eax, ebx
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd231pd zmm8{k1}, zmm19, zmm17
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm17, zmm3, zmm22
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm19, zmm3, zmm3
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm17, zmm19, zmm17
 | 
				
			||||||
 | 
					|   1      |             |      | 1.0     1.0 |             |      |      |      |      | vmovupd zmm19, zmmword ptr [rsp+0x1c0]
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vsubpd zmm19, zmm19, zmm29
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      |      | 1.0  |      | shl al, 0x5
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm3, zmm1, zmm3
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm3, zmm3, zmm17
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vaddpd zmm17, zmm17, zmm2
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm3, zmm3, zmm17
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vsubpd zmm17, zmm23, zmm30
 | 
				
			||||||
 | 
					|   1      |             | 0.5  |             |             |      |      | 0.5  |      | sub cl, al
 | 
				
			||||||
 | 
					|   1      |             | 0.5  |             |             |      |      | 0.5  |      | add cl, 0xfd
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | kmovd k1, ecx
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vcmppd k1{k1}, zmm4, zmm0, 0x1
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vsubpd zmm4, zmm27, zmm31
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd231pd zmm14{k1}, zmm3, zmm21
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm21, zmm4, zmm4
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd231pd zmm21, zmm17, zmm17
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd231pd zmm21, zmm19, zmm19
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd231pd zmm10{k1}, zmm3, zmm20
 | 
				
			||||||
 | 
					|   3      | 2.0         |      |             |             |      | 1.0  |      |      | vrcp14pd zmm20, zmm21
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd231pd zmm6{k1}, zmm3, zmm18
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm3, zmm20, zmm22
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm18, zmm20, zmm20
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm3, zmm18, zmm3
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm18, zmm1, zmm20
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm18, zmm18, zmm3
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vaddpd zmm3, zmm3, zmm2
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm3, zmm18, zmm3
 | 
				
			||||||
 | 
					|   1      |             | 1.0  |             |             |      |      |      |      | lea eax, ptr [rdx*4]
 | 
				
			||||||
 | 
					|   1*     |             |      |             |             |      |      |      |      | mov ecx, ebx
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      |      | 1.0  |      | shl cl, 0x6
 | 
				
			||||||
 | 
					|   1      |             | 0.5  |             |             |      |      | 0.5  |      | sub al, cl
 | 
				
			||||||
 | 
					|   1      |             | 0.5  |             |             |      |      | 0.5  |      | add al, 0xfb
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | kmovd k1, eax
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vcmppd k1{k1}, zmm21, zmm0, 0x1
 | 
				
			||||||
 | 
					|   1      |             |      |             | 1.0     1.0 |      |      |      |      | vmovupd zmm18, zmmword ptr [rsp+0x180]
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vsubpd zmm18, zmm18, zmm29
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vsubpd zmm20, zmm24, zmm30
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vsubpd zmm21, zmm28, zmm31
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd231pd zmm16{k1}, zmm3, zmm19
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm19, zmm21, zmm21
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd231pd zmm19, zmm20, zmm20
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd231pd zmm19, zmm18, zmm18
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd231pd zmm11{k1}, zmm3, zmm17
 | 
				
			||||||
 | 
					|   3      | 2.0         |      |             |             |      | 1.0  |      |      | vrcp14pd zmm17, zmm19
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vfmadd231pd zmm7{k1}, zmm3, zmm4
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm3, zmm17, zmm22
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm4, zmm17, zmm17
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm3, zmm4, zmm3
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vmulpd zmm4, zmm1, zmm17
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm4, zmm4, zmm3
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vaddpd zmm3, zmm3, zmm2
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vmulpd zmm3, zmm4, zmm3
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      |      | 1.0  |      | shl dl, 0x3
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      |      | 1.0  |      | shl bl, 0x7
 | 
				
			||||||
 | 
					|   1      |             | 1.0  |             |             |      |      |      |      | sub dl, bl
 | 
				
			||||||
 | 
					|   1      |             | 1.0  |             |             |      |      |      |      | add dl, 0xf7
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | kmovd k1, edx
 | 
				
			||||||
 | 
					|   1      |             |      |             |             |      | 1.0  |      |      | vcmppd k1{k1}, zmm19, zmm0, 0x1
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd231pd zmm13{k1}, zmm3, zmm18
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd231pd zmm9{k1}, zmm3, zmm20
 | 
				
			||||||
 | 
					|   1      | 1.0         |      |             |             |      |      |      |      | vfmadd231pd zmm5{k1}, zmm3, zmm21
 | 
				
			||||||
 | 
					|   1      |             | 0.5  |             |             |      |      | 0.5  |      | inc r14
 | 
				
			||||||
 | 
					|   1*     |             |      |             |             |      |      |      |      | cmp r11, r14
 | 
				
			||||||
 | 
					|   0*F    |             |      |             |             |      |      |      |      | jnz 0xfffffffffffffd99
 | 
				
			||||||
 | 
					Total Num Of Uops: 123
 | 
				
			||||||
 | 
					Analysis Notes:
 | 
				
			||||||
 | 
					Backend allocation was stalled due to unavailable allocation resources.
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										159
									
								
								static_analysis/gromacs-avx512-dp-ICX-osaca.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										159
									
								
								static_analysis/gromacs-avx512-dp-ICX-osaca.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,159 @@
 | 
				
			|||||||
 | 
					Open Source Architecture Code Analyzer (OSACA) - 0.4.12
 | 
				
			||||||
 | 
					Analyzed file:      gromacs-avx512-dp-ICX.s
 | 
				
			||||||
 | 
					Architecture:       CSX
 | 
				
			||||||
 | 
					Timestamp:          2023-01-03 00:07:20
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
 | 
				
			||||||
 | 
					 * - Instruction micro-ops not bound to a port
 | 
				
			||||||
 | 
					 X - No throughput/latency information for this instruction in data file
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Combined Analysis Report
 | 
				
			||||||
 | 
					------------------------
 | 
				
			||||||
 | 
					                                      Port pressure in cycles                                       
 | 
				
			||||||
 | 
					     |  0   - 0DV  |  1   |  2   -  2D  |  3   -  3D  |  4   |  5   |  6   |  7   ||  CP  | LCD  |
 | 
				
			||||||
 | 
					--------------------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					2287 |             |      |             |             |      |      |      |      ||      |      |   .LBB5_11:                               #
 | 
				
			||||||
 | 
					2288 |             |      |             |             |      |      |      |      ||      |      |   #   Parent Loop BB5_6 Depth=1
 | 
				
			||||||
 | 
					2289 |             |      |             |             |      |      |      |      ||      |      |   # =>  This Inner Loop Header: Depth=2
 | 
				
			||||||
 | 
					2290 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||  4.0 |      |   movslq (%r12,%r14,4), %rbx
 | 
				
			||||||
 | 
					2291 |             | 1.00 |             |             |      | 0.00 |      |      ||  1.0 |      |   leaq (%rbx,%rbx,2), %rcx
 | 
				
			||||||
 | 
					2292 | 0.00        |      |             |             |      |      | 1.00 |      ||  1.0 |      |   shlq $6, %rcx
 | 
				
			||||||
 | 
					2293 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||      |      |   vmovapd (%rsi,%rcx), %zmm29
 | 
				
			||||||
 | 
					2294 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||      |      |   vmovapd 64(%rsi,%rcx), %zmm30
 | 
				
			||||||
 | 
					2295 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||  0.0 |      |   vmovapd 128(%rsi,%rcx), %zmm31
 | 
				
			||||||
 | 
					2296 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||      |      |   vmovupd 64(%rsp), %zmm3         # 64-byte Reload
 | 
				
			||||||
 | 
					2297 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vsubpd %zmm29, %zmm3, %zmm4
 | 
				
			||||||
 | 
					2298 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||      |      |   vmovupd 320(%rsp), %zmm3        # 64-byte Reload
 | 
				
			||||||
 | 
					2299 | 0.75        |      |             |             |      | 0.25 |      |      ||      |      |   vsubpd %zmm30, %zmm3, %zmm3
 | 
				
			||||||
 | 
					2300 |             | 1.00 |             |             |      | 0.00 |      |      ||      |      |   leal (%rbx,%rbx), %ecx
 | 
				
			||||||
 | 
					2301 | 0.00        | 1.00 |             |             |      | 0.00 | 0.00 |      ||      |      |   cmpq %rcx, %rdi
 | 
				
			||||||
 | 
					2302 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   setne %dl
 | 
				
			||||||
 | 
					2303 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   sete %cl
 | 
				
			||||||
 | 
					2304 |             | 1.00 |             |             |      |      |      |      ||      |      |   leal 1(%rbx,%rbx), %ebx
 | 
				
			||||||
 | 
					2305 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vsubpd %zmm31, %zmm25, %zmm17
 | 
				
			||||||
 | 
					2306 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vmulpd %zmm17, %zmm17, %zmm18
 | 
				
			||||||
 | 
					2307 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vfmadd231pd %zmm3, %zmm3, %zmm18 # zmm18 = (zmm3 * zmm3) + zmm18
 | 
				
			||||||
 | 
					2308 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vfmadd231pd %zmm4, %zmm4, %zmm18 # zmm18 = (zmm4 * zmm4) + zmm18
 | 
				
			||||||
 | 
					2309 | 2.75        |      |             |             |      | 0.25 |      |      ||  8.0 |      |   vrcp14pd %zmm18, %zmm19
 | 
				
			||||||
 | 
					2310 | 0.00        | 0.75 |             |             |      | 0.00 | 0.25 |      ||      |      |   cmpq %rbx, %rdi
 | 
				
			||||||
 | 
					2311 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   sete %bl
 | 
				
			||||||
 | 
					2312 | 0.00        | 0.75 |             |             |      | 0.00 | 0.25 |      ||      |      |   movl %ebx, %ebp
 | 
				
			||||||
 | 
					2313 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vmulpd %zmm22, %zmm19, %zmm20
 | 
				
			||||||
 | 
					2314 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm19, %zmm19, %zmm21
 | 
				
			||||||
 | 
					2315 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vmulpd %zmm20, %zmm21, %zmm20
 | 
				
			||||||
 | 
					2316 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||      |      |   vmovupd 128(%rsp), %zmm21       # 64-byte Reload
 | 
				
			||||||
 | 
					2317 | 0.75        |      |             |             |      | 0.25 |      |      ||      |      |   vsubpd %zmm29, %zmm21, %zmm21
 | 
				
			||||||
 | 
					2318 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   shlb $4, %bpl
 | 
				
			||||||
 | 
					2319 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm19, %zmm1, %zmm19
 | 
				
			||||||
 | 
					2320 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vmulpd %zmm20, %zmm19, %zmm19
 | 
				
			||||||
 | 
					2321 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vaddpd %zmm2, %zmm20, %zmm20
 | 
				
			||||||
 | 
					2322 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vmulpd %zmm20, %zmm19, %zmm19
 | 
				
			||||||
 | 
					2323 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||      |      |   vmovupd 256(%rsp), %zmm20       # 64-byte Reload
 | 
				
			||||||
 | 
					2324 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vsubpd %zmm30, %zmm20, %zmm20
 | 
				
			||||||
 | 
					2325 | 0.00        | 0.50 |             |             |      | 0.00 | 0.50 |      ||      |      |   notb %bpl
 | 
				
			||||||
 | 
					2326 | 0.00        | 0.25 |             |             |      | 0.00 | 0.75 |      ||      |      |   subb %cl, %bpl
 | 
				
			||||||
 | 
					2327 | 1.00        |      |             |             |      |      |      |      ||      |      |   kmovd %ebp, %k1
 | 
				
			||||||
 | 
					2328 |             |      |             |             |      |      |      |      ||      |      | X vcmpltpd %zmm0, %zmm18, %k1 {%k1}
 | 
				
			||||||
 | 
					2329 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vsubpd %zmm31, %zmm26, %zmm18
 | 
				
			||||||
 | 
					2330 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vfmadd231pd %zmm4, %zmm19, %zmm15 {%k1} # zmm15 = (zmm19 * zmm4) + zmm15
 | 
				
			||||||
 | 
					2331 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm18, %zmm18, %zmm4
 | 
				
			||||||
 | 
					2332 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm20, %zmm20, %zmm4 # zmm4 = (zmm20 * zmm20) + zmm4
 | 
				
			||||||
 | 
					2333 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm21, %zmm21, %zmm4 # zmm4 = (zmm21 * zmm21) + zmm4
 | 
				
			||||||
 | 
					2334 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm3, %zmm19, %zmm12 {%k1} # zmm12 = (zmm19 * zmm3) + zmm12
 | 
				
			||||||
 | 
					2335 | 2.25        |      |             |             |      | 0.75 |      |      ||      |      |   vrcp14pd %zmm4, %zmm3
 | 
				
			||||||
 | 
					2336 |             | 1.00 |             |             |      | 0.00 |      |      ||      |      |   leal (%rdx,%rdx), %ecx
 | 
				
			||||||
 | 
					2337 | 0.00        | 0.75 |             |             |      | 0.00 | 0.25 |      ||      |      |   movl %ebx, %eax
 | 
				
			||||||
 | 
					2338 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm17, %zmm19, %zmm8 {%k1} # zmm8 = (zmm19 * zmm17) + zmm8
 | 
				
			||||||
 | 
					2339 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm22, %zmm3, %zmm17
 | 
				
			||||||
 | 
					2340 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm3, %zmm3, %zmm19
 | 
				
			||||||
 | 
					2341 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm17, %zmm19, %zmm17
 | 
				
			||||||
 | 
					2342 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||      |      |   vmovupd 448(%rsp), %zmm19       # 64-byte Reload
 | 
				
			||||||
 | 
					2343 | 0.75        |      |             |             |      | 0.25 |      |      ||      |      |   vsubpd %zmm29, %zmm19, %zmm19
 | 
				
			||||||
 | 
					2344 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   shlb $5, %al
 | 
				
			||||||
 | 
					2345 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm3, %zmm1, %zmm3
 | 
				
			||||||
 | 
					2346 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm17, %zmm3, %zmm3
 | 
				
			||||||
 | 
					2347 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vaddpd %zmm2, %zmm17, %zmm17
 | 
				
			||||||
 | 
					2348 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm17, %zmm3, %zmm3
 | 
				
			||||||
 | 
					2349 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vsubpd %zmm30, %zmm23, %zmm17
 | 
				
			||||||
 | 
					2350 | 0.00        | 0.50 |             |             |      | 0.00 | 0.50 |      ||      |      |   subb %al, %cl
 | 
				
			||||||
 | 
					2351 | 0.00        | 0.25 |             |             |      | 0.00 | 0.75 |      ||      |      |   addb $-3, %cl
 | 
				
			||||||
 | 
					2352 | 1.00        |      |             |             |      |      |      |      ||      |      |   kmovd %ecx, %k1
 | 
				
			||||||
 | 
					2353 |             |      |             |             |      |      |      |      ||      |      | X vcmpltpd %zmm0, %zmm4, %k1 {%k1}
 | 
				
			||||||
 | 
					2354 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vsubpd %zmm31, %zmm27, %zmm4
 | 
				
			||||||
 | 
					2355 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm21, %zmm3, %zmm14 {%k1} # zmm14 = (zmm3 * zmm21) + zmm14
 | 
				
			||||||
 | 
					2356 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm4, %zmm4, %zmm21
 | 
				
			||||||
 | 
					2357 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm17, %zmm17, %zmm21 # zmm21 = (zmm17 * zmm17) + zmm21
 | 
				
			||||||
 | 
					2358 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm19, %zmm19, %zmm21 # zmm21 = (zmm19 * zmm19) + zmm21
 | 
				
			||||||
 | 
					2359 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm20, %zmm3, %zmm10 {%k1} # zmm10 = (zmm3 * zmm20) + zmm10
 | 
				
			||||||
 | 
					2360 | 2.50        |      |             |             |      | 0.50 |      |      ||      |      |   vrcp14pd %zmm21, %zmm20
 | 
				
			||||||
 | 
					2361 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm18, %zmm3, %zmm6 {%k1} # zmm6 = (zmm3 * zmm18) + zmm6
 | 
				
			||||||
 | 
					2362 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm22, %zmm20, %zmm3
 | 
				
			||||||
 | 
					2363 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm20, %zmm20, %zmm18
 | 
				
			||||||
 | 
					2364 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm3, %zmm18, %zmm3
 | 
				
			||||||
 | 
					2365 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm20, %zmm1, %zmm18
 | 
				
			||||||
 | 
					2366 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm3, %zmm18, %zmm18
 | 
				
			||||||
 | 
					2367 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vaddpd %zmm2, %zmm3, %zmm3
 | 
				
			||||||
 | 
					2368 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm3, %zmm18, %zmm3
 | 
				
			||||||
 | 
					2369 |             | 1.00 |             |             |      | 0.00 |      |      ||      |      |   leal (,%rdx,4), %eax
 | 
				
			||||||
 | 
					2370 | 0.00        | 0.75 |             |             |      | 0.00 | 0.25 |      ||      |      |   movl %ebx, %ecx
 | 
				
			||||||
 | 
					2371 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   shlb $6, %cl
 | 
				
			||||||
 | 
					2372 | 0.00        | 0.50 |             |             |      | 0.00 | 0.50 |      ||      |      |   subb %cl, %al
 | 
				
			||||||
 | 
					2373 | 0.00        | 1.00 |             |             |      | 0.00 | 0.00 |      ||      |      |   addb $-5, %al
 | 
				
			||||||
 | 
					2374 | 1.00        |      |             |             |      |      |      |      ||      |      |   kmovd %eax, %k1
 | 
				
			||||||
 | 
					2375 |             |      |             |             |      |      |      |      ||      |      | X vcmpltpd %zmm0, %zmm21, %k1 {%k1}
 | 
				
			||||||
 | 
					2376 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||      |      |   vmovupd 384(%rsp), %zmm18       # 64-byte Reload
 | 
				
			||||||
 | 
					2377 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vsubpd %zmm29, %zmm18, %zmm18
 | 
				
			||||||
 | 
					2378 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vsubpd %zmm30, %zmm24, %zmm20
 | 
				
			||||||
 | 
					2379 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vsubpd %zmm31, %zmm28, %zmm21
 | 
				
			||||||
 | 
					2380 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm19, %zmm3, %zmm16 {%k1} # zmm16 = (zmm3 * zmm19) + zmm16
 | 
				
			||||||
 | 
					2381 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm21, %zmm21, %zmm19
 | 
				
			||||||
 | 
					2382 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm20, %zmm20, %zmm19 # zmm19 = (zmm20 * zmm20) + zmm19
 | 
				
			||||||
 | 
					2383 | 0.25        |      |             |             |      | 0.75 |      |      ||      |      |   vfmadd231pd %zmm18, %zmm18, %zmm19 # zmm19 = (zmm18 * zmm18) + zmm19
 | 
				
			||||||
 | 
					2384 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vfmadd231pd %zmm17, %zmm3, %zmm11 {%k1} # zmm11 = (zmm3 * zmm17) + zmm11
 | 
				
			||||||
 | 
					2385 | 2.00        |      |             |             |      | 1.00 |      |      ||      |      |   vrcp14pd %zmm19, %zmm17
 | 
				
			||||||
 | 
					2386 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vfmadd231pd %zmm4, %zmm3, %zmm7 {%k1} # zmm7 = (zmm3 * zmm4) + zmm7
 | 
				
			||||||
 | 
					2387 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vmulpd %zmm22, %zmm17, %zmm3
 | 
				
			||||||
 | 
					2388 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vmulpd %zmm17, %zmm17, %zmm4
 | 
				
			||||||
 | 
					2389 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vmulpd %zmm3, %zmm4, %zmm3
 | 
				
			||||||
 | 
					2390 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vmulpd %zmm17, %zmm1, %zmm4
 | 
				
			||||||
 | 
					2391 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vmulpd %zmm3, %zmm4, %zmm4
 | 
				
			||||||
 | 
					2392 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vaddpd %zmm2, %zmm3, %zmm3
 | 
				
			||||||
 | 
					2393 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vmulpd %zmm3, %zmm4, %zmm3
 | 
				
			||||||
 | 
					2394 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   shlb $3, %dl
 | 
				
			||||||
 | 
					2395 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   shlb $7, %bl
 | 
				
			||||||
 | 
					2396 | 0.00        | 0.50 |             |             |      | 0.00 | 0.50 |      ||      |      |   subb %bl, %dl
 | 
				
			||||||
 | 
					2397 | 0.00        | 0.50 |             |             |      | 0.00 | 0.50 |      ||      |      |   addb $-9, %dl
 | 
				
			||||||
 | 
					2398 | 1.00        |      |             |             |      |      |      |      ||      |      |   kmovd %edx, %k1
 | 
				
			||||||
 | 
					2399 |             |      |             |             |      |      |      |      ||      |      | X vcmpltpd %zmm0, %zmm19, %k1 {%k1}
 | 
				
			||||||
 | 
					2400 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vfmadd231pd %zmm18, %zmm3, %zmm13 {%k1} # zmm13 = (zmm3 * zmm18) + zmm13
 | 
				
			||||||
 | 
					2401 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vfmadd231pd %zmm20, %zmm3, %zmm9 {%k1} # zmm9 = (zmm3 * zmm20) + zmm9
 | 
				
			||||||
 | 
					2402 | 0.00        |      |             |             |      | 1.00 |      |      ||      |  4.0 |   vfmadd231pd %zmm21, %zmm3, %zmm5 {%k1} # zmm5 = (zmm3 * zmm21) + zmm5
 | 
				
			||||||
 | 
					2403 | 0.00        | 0.50 |             |             |      | 0.00 | 0.50 |      ||      |      |   incq %r14
 | 
				
			||||||
 | 
					2404 | 0.00        | 1.00 |             |             |      | 0.00 | 0.00 |      ||      |      |   cmpq %r14, %r11
 | 
				
			||||||
 | 
					2405 |             |      |             |             |      |      |      |      ||      |      | * jne .LBB5_11
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					       40.0          14.5   5.00   5.00   5.00   5.00          40.0   14.5           50.0    4.0  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Loop-Carried Dependencies Analysis Report
 | 
				
			||||||
 | 
					-----------------------------------------
 | 
				
			||||||
 | 
					2402 |  4.0 | vfmadd231pd	%zmm21, %zmm3, %zmm5 {%k1} # zmm5 = (zmm3 * zmm21) + zmm5| [2402]
 | 
				
			||||||
 | 
					2401 |  4.0 | vfmadd231pd	%zmm20, %zmm3, %zmm9 {%k1} # zmm9 = (zmm3 * zmm20) + zmm9| [2401]
 | 
				
			||||||
 | 
					2400 |  4.0 | vfmadd231pd	%zmm18, %zmm3, %zmm13 {%k1} # zmm13 = (zmm3 * zmm18) + zmm13| [2400]
 | 
				
			||||||
 | 
					2386 |  4.0 | vfmadd231pd	%zmm4, %zmm3, %zmm7 {%k1} # zmm7 = (zmm3 * zmm4) + zmm7| [2386]
 | 
				
			||||||
 | 
					2384 |  4.0 | vfmadd231pd	%zmm17, %zmm3, %zmm11 {%k1} # zmm11 = (zmm3 * zmm17) + zmm11| [2384]
 | 
				
			||||||
 | 
					2380 |  4.0 | vfmadd231pd	%zmm19, %zmm3, %zmm16 {%k1} # zmm16 = (zmm3 * zmm19) + zmm16| [2380]
 | 
				
			||||||
 | 
					2361 |  4.0 | vfmadd231pd	%zmm18, %zmm3, %zmm6 {%k1} # zmm6 = (zmm3 * zmm18) + zmm6| [2361]
 | 
				
			||||||
 | 
					2359 |  4.0 | vfmadd231pd	%zmm20, %zmm3, %zmm10 {%k1} # zmm10 = (zmm3 * zmm20) + zmm10| [2359]
 | 
				
			||||||
 | 
					2355 |  4.0 | vfmadd231pd	%zmm21, %zmm3, %zmm14 {%k1} # zmm14 = (zmm3 * zmm21) + zmm14| [2355]
 | 
				
			||||||
 | 
					2338 |  4.0 | vfmadd231pd	%zmm17, %zmm19, %zmm8 {%k1} # zmm8 = (zmm19 * zmm17) + zmm8| [2338]
 | 
				
			||||||
 | 
					2334 |  4.0 | vfmadd231pd	%zmm3, %zmm19, %zmm12 {%k1} # zmm12 = (zmm19 * zmm3) + zmm12| [2334]
 | 
				
			||||||
 | 
					2330 |  4.0 | vfmadd231pd	%zmm4, %zmm19, %zmm15 {%k1} # zmm15 = (zmm19 * zmm4) + zmm15| [2330]
 | 
				
			||||||
 | 
					2394 |  3.0 | shlb	$3, %dl                        | [2394, 2396, 2397]
 | 
				
			||||||
 | 
					2318 |  3.0 | shlb	$4, %bpl                       | [2318, 2325, 2326]
 | 
				
			||||||
 | 
					2403 |  1.0 | incq	%r14                           | [2403]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		Reference in New Issue
	
	Block a user