fixed wrong markers and added OSACA output for ICX
This commit is contained in:
		
							
								
								
									
										167
									
								
								static_analysis/jan/analyses/gromacs-icx-avx512-dp-osaca-icx.out
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										167
									
								
								static_analysis/jan/analyses/gromacs-icx-avx512-dp-osaca-icx.out
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,167 @@
 | 
				
			|||||||
 | 
					Open Source Architecture Code Analyzer (OSACA) - 0.4.12
 | 
				
			||||||
 | 
					Analyzed file:      gromacs-icx-avx512-dp.s
 | 
				
			||||||
 | 
					Architecture:       ICX
 | 
				
			||||||
 | 
					Timestamp:          2023-02-14 12:51:57
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
 | 
				
			||||||
 | 
					 * - Instruction micro-ops not bound to a port
 | 
				
			||||||
 | 
					 X - No throughput/latency information for this instruction in data file
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Combined Analysis Report
 | 
				
			||||||
 | 
					------------------------
 | 
				
			||||||
 | 
					                                                 Port pressure in cycles                                                  
 | 
				
			||||||
 | 
					     |  0   - 0DV  |  1   - 1DV  |  2   -  2D  |  3   -  3D  |  4   |   5   |  6   |  7   |  8   |  9   ||  CP  | LCD  |
 | 
				
			||||||
 | 
					------------------------------------------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					2241 |             |             |             |             |      |       |      |      |      |      ||      |      |   # pointer_increment=64 da67166e5736661e6b03ea29ee7bfd67
 | 
				
			||||||
 | 
					2242 |             |             |             |             |      |       |      |      |      |      ||      |      |   # LLVM-MCA-BEGIN
 | 
				
			||||||
 | 
					2243 |             |             |             |             |      |       |      |      |      |      ||      |      |   .LBB5_12:                               #   Parent Loop BB5_7 Depth=1
 | 
				
			||||||
 | 
					2244 |             |             |             |             |      |       |      |      |      |      ||      |      |   # =>  This Inner Loop Header: Depth=2
 | 
				
			||||||
 | 
					2245 |             |             | 0.50   0.50 | 0.50   0.50 |      |       |      |      |      |      ||  5.0 |      |   movslq (%r10,%rbx,4), %rcx
 | 
				
			||||||
 | 
					2246 | 0.00        | 0.75        |             |             |      | 0.000 | 0.25 |      |      |      ||  6.0 |      |   leaq (%rcx,%rcx,2), %rdx
 | 
				
			||||||
 | 
					2247 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||  1.0 |      |   shlq $6, %rdx
 | 
				
			||||||
 | 
					2248 |             |             | 0.50   0.50 | 0.50   0.50 |      |       |      |      |      |      ||      |      |   vmovupd (%rsi,%rdx), %zmm28             # AlignMOV convert to UnAlignMOV
 | 
				
			||||||
 | 
					2249 |             |             | 0.50   0.50 | 0.50   0.50 |      |       |      |      |      |      ||      |      |   vmovupd 64(%rsi,%rdx), %zmm29           # AlignMOV convert to UnAlignMOV
 | 
				
			||||||
 | 
					2250 |             |             | 0.50   0.50 | 0.50   0.50 |      |       |      |      |      |      ||  5.0 |      |   vmovupd 128(%rsi,%rdx), %zmm30          # AlignMOV convert to UnAlignMOV
 | 
				
			||||||
 | 
					2251 |             |             | 0.50   0.50 | 0.50   0.50 |      |       |      |      |      |      ||      |      |   vmovupd 16(%rsp), %zmm3                 # 64-byte Reload
 | 
				
			||||||
 | 
					2252 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vsubpd %zmm28, %zmm3, %zmm3
 | 
				
			||||||
 | 
					2253 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vsubpd %zmm30, %zmm24, %zmm31
 | 
				
			||||||
 | 
					2254 |             |             | 0.50   0.50 | 0.50   0.50 |      |       |      |      |      |      ||      |      |   vmovupd 336(%rsp), %zmm16               # 64-byte Reload
 | 
				
			||||||
 | 
					2255 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vsubpd %zmm29, %zmm16, %zmm16
 | 
				
			||||||
 | 
					2256 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vmulpd %zmm31, %zmm31, %zmm17
 | 
				
			||||||
 | 
					2257 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vfmadd231pd %zmm16, %zmm16, %zmm17  # zmm17 = (zmm16 * zmm16) + zmm17
 | 
				
			||||||
 | 
					2258 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vfmadd231pd %zmm3, %zmm3, %zmm17    # zmm17 = (zmm3 * zmm3) + zmm17
 | 
				
			||||||
 | 
					2259 | 2.50        |             |             |             |      | 0.500 |      |      |      |      ||  6.0 |      |   vrcp14pd %zmm17, %zmm18
 | 
				
			||||||
 | 
					2260 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vmulpd %zmm18, %zmm21, %zmm19
 | 
				
			||||||
 | 
					2261 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vmulpd %zmm19, %zmm18, %zmm19
 | 
				
			||||||
 | 
					2262 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vmulpd %zmm19, %zmm18, %zmm19
 | 
				
			||||||
 | 
					2263 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vaddpd %zmm1, %zmm19, %zmm20
 | 
				
			||||||
 | 
					2264 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm18, %zmm22, %zmm18
 | 
				
			||||||
 | 
					2265 | 0.75        |             |             |             |      | 0.250 |      |      |      |      ||  4.0 |      |   vmulpd %zmm20, %zmm18, %zmm18
 | 
				
			||||||
 | 
					2266 | 1.00        |             |             |             |      | 0.000 |      |      |      |      ||      |      |   vsubpd %zmm30, %zmm25, %zmm20
 | 
				
			||||||
 | 
					2267 | 0.00        | 0.50        |             |             |      | 0.000 | 0.50 |      |      |      ||      |      |   leal (%rcx,%rcx), %edx
 | 
				
			||||||
 | 
					2268 | 0.00        | 1.00        |             |             |      | 0.000 | 0.00 |      |      |      ||      |      |   cmpq %rdx, %r11
 | 
				
			||||||
 | 
					2269 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   setne %dl
 | 
				
			||||||
 | 
					2270 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   sete %al
 | 
				
			||||||
 | 
					2271 | 0.00        | 0.50        |             |             |      | 0.000 | 0.50 |      |      |      ||      |      |   addl %ecx, %ecx
 | 
				
			||||||
 | 
					2272 | 0.00        | 0.50        |             |             |      | 0.000 | 0.50 |      |      |      ||      |      |   incl %ecx
 | 
				
			||||||
 | 
					2273 | 0.00        | 1.00        |             |             |      | 0.000 | 0.00 |      |      |      ||      |      |   cmpq %rcx, %r11
 | 
				
			||||||
 | 
					2274 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   sete %cl
 | 
				
			||||||
 | 
					2275 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vmulpd %zmm18, %zmm19, %zmm18
 | 
				
			||||||
 | 
					2276 |             |             | 0.50   0.50 | 0.50   0.50 |      |       |      |      |      |      ||      |      |   vmovupd 528(%rsp), %zmm19               # 64-byte Reload
 | 
				
			||||||
 | 
					2277 | 1.00        |             |             |             |      | 0.000 |      |      |      |      ||      |      |   vsubpd %zmm28, %zmm19, %zmm19
 | 
				
			||||||
 | 
					2278 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   setne %dil
 | 
				
			||||||
 | 
					2279 | 0.00        | 0.75        |             |             |      | 0.000 | 0.25 |      |      |      ||      |      |   movl %edi, %ebp
 | 
				
			||||||
 | 
					2280 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |  1.0 |   shlb $4, %bpl
 | 
				
			||||||
 | 
					2281 | 0.00        | 0.50        |             |             |      | 0.000 | 0.50 |      |      |      ||      |  1.0 |   subb %al, %bpl
 | 
				
			||||||
 | 
					2282 | 0.00        | 0.50        |             |             |      | 0.000 | 0.50 |      |      |      ||      |  1.0 |   addb $-17, %bpl
 | 
				
			||||||
 | 
					2283 | 1.00        |             |             |             |      |       |      |      |      |      ||      |      |   kmovd %ebp, %k1
 | 
				
			||||||
 | 
					2284 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vcmpltpd %zmm0, %zmm17, %k1 {%k1}
 | 
				
			||||||
 | 
					2285 |             |             | 0.50   0.50 | 0.50   0.50 |      |       |      |      |      |      ||      |      |   vmovupd 272(%rsp), %zmm17               # 64-byte Reload
 | 
				
			||||||
 | 
					2286 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vsubpd %zmm29, %zmm17, %zmm17
 | 
				
			||||||
 | 
					2287 | 0.00        | 0.50        |             |             |      | 0.000 | 0.50 |      |      |      ||      |      |   leal (%rdx,%rdx), %eax
 | 
				
			||||||
 | 
					2288 | 0.00        | 0.75        |             |             |      | 0.000 | 0.25 |      |      |      ||      |      |   movl %edi, %ebp
 | 
				
			||||||
 | 
					2289 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vmulpd %zmm2, %zmm18, %zmm18
 | 
				
			||||||
 | 
					2290 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vfmadd231pd %zmm18, %zmm3, %zmm14 {%k1} # zmm14 {%k1} = (zmm3 * zmm18) + zmm14
 | 
				
			||||||
 | 
					2291 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm20, %zmm20, %zmm3
 | 
				
			||||||
 | 
					2292 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vfmadd231pd %zmm17, %zmm17, %zmm3   # zmm3 = (zmm17 * zmm17) + zmm3
 | 
				
			||||||
 | 
					2293 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vfmadd231pd %zmm19, %zmm19, %zmm3   # zmm3 = (zmm19 * zmm19) + zmm3
 | 
				
			||||||
 | 
					2294 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vfmadd231pd %zmm18, %zmm16, %zmm11 {%k1} # zmm11 {%k1} = (zmm16 * zmm18) + zmm11
 | 
				
			||||||
 | 
					2295 | 2.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vrcp14pd %zmm3, %zmm16
 | 
				
			||||||
 | 
					2296 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vfmadd231pd %zmm18, %zmm31, %zmm7 {%k1} # zmm7 {%k1} = (zmm31 * zmm18) + zmm7
 | 
				
			||||||
 | 
					2297 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm16, %zmm21, %zmm18
 | 
				
			||||||
 | 
					2298 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm18, %zmm16, %zmm18
 | 
				
			||||||
 | 
					2299 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm18, %zmm16, %zmm18
 | 
				
			||||||
 | 
					2300 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vaddpd %zmm1, %zmm18, %zmm31
 | 
				
			||||||
 | 
					2301 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm16, %zmm22, %zmm16
 | 
				
			||||||
 | 
					2302 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm31, %zmm16, %zmm16
 | 
				
			||||||
 | 
					2303 |             |             | 0.50   0.50 | 0.50   0.50 |      |       |      |      |      |      ||      |      |   vmovupd 464(%rsp), %zmm31               # 64-byte Reload
 | 
				
			||||||
 | 
					2304 | 0.75        |             |             |             |      | 0.250 |      |      |      |      ||      |      |   vsubpd %zmm28, %zmm31, %zmm31
 | 
				
			||||||
 | 
					2305 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |  1.0 |   shlb $5, %bpl
 | 
				
			||||||
 | 
					2306 | 0.00        | 0.50        |             |             |      | 0.000 | 0.50 |      |      |      ||      |  1.0 |   orb %al, %bpl
 | 
				
			||||||
 | 
					2307 | 0.00        | 0.50        |             |             |      | 0.000 | 0.50 |      |      |      ||      |  1.0 |   orb $-35, %bpl
 | 
				
			||||||
 | 
					2308 | 1.00        |             |             |             |      |       |      |      |      |      ||      |      |   kmovd %ebp, %k1
 | 
				
			||||||
 | 
					2309 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vcmpltpd %zmm0, %zmm3, %k1 {%k1}
 | 
				
			||||||
 | 
					2310 |             |             | 0.50   0.50 | 0.50   0.50 |      |       |      |      |      |      ||      |      |   vmovupd 208(%rsp), %zmm3                # 64-byte Reload
 | 
				
			||||||
 | 
					2311 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vsubpd %zmm29, %zmm3, %zmm3
 | 
				
			||||||
 | 
					2312 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm16, %zmm18, %zmm16
 | 
				
			||||||
 | 
					2313 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vsubpd %zmm30, %zmm26, %zmm18
 | 
				
			||||||
 | 
					2314 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm2, %zmm16, %zmm16
 | 
				
			||||||
 | 
					2315 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vfmadd231pd %zmm16, %zmm19, %zmm15 {%k1} # zmm15 {%k1} = (zmm19 * zmm16) + zmm15
 | 
				
			||||||
 | 
					2316 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm18, %zmm18, %zmm19
 | 
				
			||||||
 | 
					2317 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vfmadd231pd %zmm3, %zmm3, %zmm19    # zmm19 = (zmm3 * zmm3) + zmm19
 | 
				
			||||||
 | 
					2318 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vfmadd231pd %zmm31, %zmm31, %zmm19  # zmm19 = (zmm31 * zmm31) + zmm19
 | 
				
			||||||
 | 
					2319 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vfmadd231pd %zmm16, %zmm17, %zmm10 {%k1} # zmm10 {%k1} = (zmm17 * zmm16) + zmm10
 | 
				
			||||||
 | 
					2320 | 2.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vrcp14pd %zmm19, %zmm17
 | 
				
			||||||
 | 
					2321 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vfmadd231pd %zmm16, %zmm20, %zmm6 {%k1} # zmm6 {%k1} = (zmm20 * zmm16) + zmm6
 | 
				
			||||||
 | 
					2322 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm17, %zmm21, %zmm16
 | 
				
			||||||
 | 
					2323 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm16, %zmm17, %zmm16
 | 
				
			||||||
 | 
					2324 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm16, %zmm17, %zmm16
 | 
				
			||||||
 | 
					2325 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vaddpd %zmm1, %zmm16, %zmm20
 | 
				
			||||||
 | 
					2326 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm17, %zmm22, %zmm17
 | 
				
			||||||
 | 
					2327 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm20, %zmm17, %zmm17
 | 
				
			||||||
 | 
					2328 | 0.75        |             |             |             |      | 0.250 |      |      |      |      ||      |      |   vmulpd %zmm17, %zmm16, %zmm16
 | 
				
			||||||
 | 
					2329 | 0.00        | 0.75        |             |             |      | 0.000 | 0.25 |      |      |      ||      |      |   leal (,%rdx,4), %eax
 | 
				
			||||||
 | 
					2330 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   shlb $6, %dil
 | 
				
			||||||
 | 
					2331 | 0.00        | 1.00        |             |             |      | 0.000 | 0.00 |      |      |      ||      |      |   orb %al, %dil
 | 
				
			||||||
 | 
					2332 | 0.00        | 1.00        |             |             |      | 0.000 | 0.00 |      |      |      ||      |      |   orb $-69, %dil
 | 
				
			||||||
 | 
					2333 | 1.00        |             |             |             |      |       |      |      |      |      ||      |      |   kmovd %edi, %k1
 | 
				
			||||||
 | 
					2334 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vcmpltpd %zmm0, %zmm19, %k1 {%k1}
 | 
				
			||||||
 | 
					2335 |             |             | 0.50   0.50 | 0.50   0.50 |      |       |      |      |      |      ||      |      |   vmovupd 400(%rsp), %zmm17               # 64-byte Reload
 | 
				
			||||||
 | 
					2336 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vsubpd %zmm28, %zmm17, %zmm17
 | 
				
			||||||
 | 
					2337 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vsubpd %zmm29, %zmm23, %zmm19
 | 
				
			||||||
 | 
					2338 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vsubpd %zmm30, %zmm27, %zmm20
 | 
				
			||||||
 | 
					2339 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm2, %zmm16, %zmm16
 | 
				
			||||||
 | 
					2340 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vfmadd231pd %zmm16, %zmm31, %zmm13 {%k1} # zmm13 {%k1} = (zmm31 * zmm16) + zmm13
 | 
				
			||||||
 | 
					2341 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulpd %zmm20, %zmm20, %zmm28
 | 
				
			||||||
 | 
					2342 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vfmadd231pd %zmm19, %zmm19, %zmm28  # zmm28 = (zmm19 * zmm19) + zmm28
 | 
				
			||||||
 | 
					2343 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vfmadd231pd %zmm17, %zmm17, %zmm28  # zmm28 = (zmm17 * zmm17) + zmm28
 | 
				
			||||||
 | 
					2344 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vfmadd231pd %zmm16, %zmm3, %zmm9 {%k1} # zmm9 {%k1} = (zmm3 * zmm16) + zmm9
 | 
				
			||||||
 | 
					2345 | 2.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vrcp14pd %zmm28, %zmm3
 | 
				
			||||||
 | 
					2346 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vfmadd231pd %zmm16, %zmm18, %zmm5 {%k1} # zmm5 {%k1} = (zmm18 * zmm16) + zmm5
 | 
				
			||||||
 | 
					2347 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vmulpd %zmm3, %zmm21, %zmm16
 | 
				
			||||||
 | 
					2348 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vmulpd %zmm16, %zmm3, %zmm16
 | 
				
			||||||
 | 
					2349 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vmulpd %zmm16, %zmm3, %zmm16
 | 
				
			||||||
 | 
					2350 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vaddpd %zmm1, %zmm16, %zmm18
 | 
				
			||||||
 | 
					2351 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vmulpd %zmm3, %zmm22, %zmm3
 | 
				
			||||||
 | 
					2352 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vmulpd %zmm18, %zmm3, %zmm3
 | 
				
			||||||
 | 
					2353 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vmulpd %zmm3, %zmm16, %zmm3
 | 
				
			||||||
 | 
					2354 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   shlb $3, %dl
 | 
				
			||||||
 | 
					2355 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   shlb $7, %cl
 | 
				
			||||||
 | 
					2356 | 0.00        | 1.00        |             |             |      | 0.000 | 0.00 |      |      |      ||      |      |   orb %dl, %cl
 | 
				
			||||||
 | 
					2357 | 0.00        | 1.00        |             |             |      | 0.000 | 0.00 |      |      |      ||      |      |   addb $-9, %cl
 | 
				
			||||||
 | 
					2358 | 1.00        |             |             |             |      |       |      |      |      |      ||      |      |   kmovd %ecx, %k1
 | 
				
			||||||
 | 
					2359 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vcmpltpd %zmm0, %zmm28, %k1 {%k1}
 | 
				
			||||||
 | 
					2360 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vmulpd %zmm2, %zmm3, %zmm3
 | 
				
			||||||
 | 
					2361 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vfmadd231pd %zmm3, %zmm17, %zmm12 {%k1} # zmm12 {%k1} = (zmm17 * zmm3) + zmm12
 | 
				
			||||||
 | 
					2362 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vfmadd231pd %zmm3, %zmm19, %zmm8 {%k1} # zmm8 {%k1} = (zmm19 * zmm3) + zmm8
 | 
				
			||||||
 | 
					2363 | 0.24        |             |             |             |      | 0.760 |      |      |      |      ||      |      |   vfmadd231pd %zmm3, %zmm20, %zmm4 {%k1} # zmm4 {%k1} = (zmm20 * zmm3) + zmm4
 | 
				
			||||||
 | 
					2364 | 0.00        | 1.00        |             |             |      | -0.01 | 0.00 |      |      |      ||      |      |   incq %rbx
 | 
				
			||||||
 | 
					2365 | 0.00        | 1.00        |             |             |      | -0.01 | 0.00 |      |      |      ||      |      |   cmpq %rbx, %r9
 | 
				
			||||||
 | 
					2366 |             |             |             |             |      |       |      |      |      |      ||      |      | * jne .LBB5_12
 | 
				
			||||||
 | 
					2367 |             |             |             |             |      |       |      |      |      |      ||      |      |   # LLVM-MCA-END
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					       44.0          15.0          5.50   5.50   5.50   5.50          43.99   15.0                           71    6.0  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Loop-Carried Dependencies Analysis Report
 | 
				
			||||||
 | 
					-----------------------------------------
 | 
				
			||||||
 | 
					2280 |  6.0 | shlb	$4, %bpl                       | [2280, 2281, 2282, 2305, 2306, 2307]
 | 
				
			||||||
 | 
					2363 |  4.0 | vfmadd231pd	%zmm3, %zmm20, %zmm4 {%k1} # zmm4 {%k1} = (zmm20 * zmm3) + zmm4| [2363]
 | 
				
			||||||
 | 
					2362 |  4.0 | vfmadd231pd	%zmm3, %zmm19, %zmm8 {%k1} # zmm8 {%k1} = (zmm19 * zmm3) + zmm8| [2362]
 | 
				
			||||||
 | 
					2361 |  4.0 | vfmadd231pd	%zmm3, %zmm17, %zmm12 {%k1} # zmm12 {%k1} = (zmm17 * zmm3) + zmm12| [2361]
 | 
				
			||||||
 | 
					2346 |  4.0 | vfmadd231pd	%zmm16, %zmm18, %zmm5 {%k1} # zmm5 {%k1} = (zmm18 * zmm16) + zmm5| [2346]
 | 
				
			||||||
 | 
					2344 |  4.0 | vfmadd231pd	%zmm16, %zmm3, %zmm9 {%k1} # zmm9 {%k1} = (zmm3 * zmm16) + zmm9| [2344]
 | 
				
			||||||
 | 
					2340 |  4.0 | vfmadd231pd	%zmm16, %zmm31, %zmm13 {%k1} # zmm13 {%k1} = (zmm31 * zmm16) + zmm13| [2340]
 | 
				
			||||||
 | 
					2321 |  4.0 | vfmadd231pd	%zmm16, %zmm20, %zmm6 {%k1} # zmm6 {%k1} = (zmm20 * zmm16) + zmm6| [2321]
 | 
				
			||||||
 | 
					2319 |  4.0 | vfmadd231pd	%zmm16, %zmm17, %zmm10 {%k1} # zmm10 {%k1} = (zmm17 * zmm16) + zmm10| [2319]
 | 
				
			||||||
 | 
					2315 |  4.0 | vfmadd231pd	%zmm16, %zmm19, %zmm15 {%k1} # zmm15 {%k1} = (zmm19 * zmm16) + zmm15| [2315]
 | 
				
			||||||
 | 
					2296 |  4.0 | vfmadd231pd	%zmm18, %zmm31, %zmm7 {%k1} # zmm7 {%k1} = (zmm31 * zmm18) + zmm7| [2296]
 | 
				
			||||||
 | 
					2294 |  4.0 | vfmadd231pd	%zmm18, %zmm16, %zmm11 {%k1} # zmm11 {%k1} = (zmm16 * zmm18) + zmm11| [2294]
 | 
				
			||||||
 | 
					2290 |  4.0 | vfmadd231pd	%zmm18, %zmm3, %zmm14 {%k1} # zmm14 {%k1} = (zmm3 * zmm18) + zmm14| [2290]
 | 
				
			||||||
 | 
					2330 |  3.0 | shlb	$6, %dil                       | [2330, 2331, 2332]
 | 
				
			||||||
 | 
					2364 |  1.0 | incq	%rbx                           | [2364]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										116
									
								
								static_analysis/jan/analyses/gromacs-icx-avx512-sp-osaca-icx.out
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								static_analysis/jan/analyses/gromacs-icx-avx512-sp-osaca-icx.out
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,116 @@
 | 
				
			|||||||
 | 
					Open Source Architecture Code Analyzer (OSACA) - 0.4.12
 | 
				
			||||||
 | 
					Analyzed file:      gromacs-icx-avx512-sp.s
 | 
				
			||||||
 | 
					Architecture:       ICX
 | 
				
			||||||
 | 
					Timestamp:          2023-02-14 12:51:43
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
 | 
				
			||||||
 | 
					 * - Instruction micro-ops not bound to a port
 | 
				
			||||||
 | 
					 X - No throughput/latency information for this instruction in data file
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Combined Analysis Report
 | 
				
			||||||
 | 
					------------------------
 | 
				
			||||||
 | 
					                                                 Port pressure in cycles                                                  
 | 
				
			||||||
 | 
					     |  0   - 0DV  |  1   - 1DV  |  2   -  2D  |  3   -  3D  |  4   |   5   |  6   |  7   |  8   |  9   ||  CP  | LCD  |
 | 
				
			||||||
 | 
					------------------------------------------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					1338 |             |             |             |             |      |       |      |      |      |      ||      |      |   # pointer_increment=64 0f91ac4f7fe1a70d0c899f7f3e745649
 | 
				
			||||||
 | 
					1339 |             |             |             |             |      |       |      |      |      |      ||      |      |   # LLVM-MCA-BEGIN
 | 
				
			||||||
 | 
					1340 |             |             |             |             |      |       |      |      |      |      ||      |      |   .LBB2_12:                               #   Parent Loop BB2_7 Depth=1
 | 
				
			||||||
 | 
					1341 |             |             |             |             |      |       |      |      |      |      ||      |      |   # =>  This Inner Loop Header: Depth=2
 | 
				
			||||||
 | 
					1342 |             |             | 0.50   0.50 | 0.50   0.50 |      |       |      |      |      |      ||  5.0 |      |   movslq (%r11,%rax,4), %rcx
 | 
				
			||||||
 | 
					1343 | 0.00        | 0.75        |             |             |      | 0.000 | 0.25 |      |      |      ||  6.0 |      |   leaq (%rcx,%rcx,2), %rdx
 | 
				
			||||||
 | 
					1344 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||  1.0 |      |   shlq $5, %rdx
 | 
				
			||||||
 | 
					1345 |             |             | 0.50   0.50 | 0.50   0.50 |      |       |      |      |      |      ||      |      |   vmovupd (%rsi,%rdx), %zmm16
 | 
				
			||||||
 | 
					1346 |             |             | 0.50   0.50 | 0.50   0.50 |      |       |      |      |      |      ||  5.0 |      |   vbroadcastf64x4 64(%rsi,%rdx), %zmm20   # zmm20 = mem[0,1,2,3,0,1,2,3]
 | 
				
			||||||
 | 
					1347 |             |             | 0.50   0.50 | 0.50   0.50 |      |       |      |      |      |      ||      |      |   vbroadcastf64x4 (%rsi,%rdx), %zmm19     # zmm19 = mem[0,1,2,3,0,1,2,3]
 | 
				
			||||||
 | 
					1348 |             |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vshuff64x2 $238, %zmm16, %zmm16, %zmm21 # zmm21 = zmm16[4,5,6,7,4,5,6,7]
 | 
				
			||||||
 | 
					1349 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vsubps %zmm19, %zmm6, %zmm18
 | 
				
			||||||
 | 
					1350 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vsubps %zmm21, %zmm10, %zmm17
 | 
				
			||||||
 | 
					1351 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vsubps %zmm20, %zmm14, %zmm16
 | 
				
			||||||
 | 
					1352 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vmulps %zmm16, %zmm16, %zmm22
 | 
				
			||||||
 | 
					1353 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vfmadd231ps %zmm17, %zmm17, %zmm22  # zmm22 = (zmm17 * zmm17) + zmm22
 | 
				
			||||||
 | 
					1354 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vfmadd231ps %zmm18, %zmm18, %zmm22  # zmm22 = (zmm18 * zmm18) + zmm22
 | 
				
			||||||
 | 
					1355 | 2.50        |             |             |             |      | 0.500 |      |      |      |      ||  6.0 |      |   vrcp14ps %zmm22, %zmm23
 | 
				
			||||||
 | 
					1356 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vmulps %zmm23, %zmm26, %zmm24
 | 
				
			||||||
 | 
					1357 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vmulps %zmm24, %zmm23, %zmm24
 | 
				
			||||||
 | 
					1358 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vmulps %zmm24, %zmm23, %zmm24
 | 
				
			||||||
 | 
					1359 | 0.75        |             |             |             |      | 0.250 |      |      |      |      ||  4.0 |      |   vaddps %zmm1, %zmm24, %zmm25
 | 
				
			||||||
 | 
					1360 | 1.00        |             |             |             |      | 0.000 |      |      |      |      ||      |      |   vmulps %zmm23, %zmm27, %zmm23
 | 
				
			||||||
 | 
					1361 | 1.00        |             |             |             |      | 0.000 |      |      |      |      ||  4.0 |      |   vmulps %zmm25, %zmm23, %zmm23
 | 
				
			||||||
 | 
					1362 | 1.00        |             |             |             |      | 0.000 |      |      |      |      ||  4.0 |      |   vmulps %zmm23, %zmm24, %zmm23
 | 
				
			||||||
 | 
					1363 | 0.00        | 0.50        |             |             |      | 0.000 | 0.50 |      |      |      ||      |      |   leal (%rcx,%rcx), %edx
 | 
				
			||||||
 | 
					1364 | 0.00        | 0.50        |             |             |      | 0.000 | 0.50 |      |      |      ||      |      |   xorl %edi, %edi
 | 
				
			||||||
 | 
					1365 | 0.00        | 0.50        |             |             |      | 0.000 | 0.50 |      |      |      ||      |      |   xorl %ebp, %ebp
 | 
				
			||||||
 | 
					1366 | 0.00        | 0.75        |             |             |      | 0.000 | 0.25 |      |      |      ||      |      |   cmpq %rdx, %r12
 | 
				
			||||||
 | 
					1367 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   setne %dil
 | 
				
			||||||
 | 
					1368 | 0.00        | 0.75        |             |             |      | 0.000 | 0.25 |      |      |      ||      |      |   leal 1(%rcx,%rcx), %ecx
 | 
				
			||||||
 | 
					1369 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   sete %bpl
 | 
				
			||||||
 | 
					1370 | 0.00        | 0.50        |             |             |      | 0.000 | 0.50 |      |      |      ||      |      |   xorl %edx, %edx
 | 
				
			||||||
 | 
					1371 | 0.00        | 0.50        |             |             |      | 0.000 | 0.50 |      |      |      ||      |      |   xorl %ebx, %ebx
 | 
				
			||||||
 | 
					1372 | 0.00        | 0.75        |             |             |      | 0.000 | 0.25 |      |      |      ||      |      |   cmpq %rcx, %r12
 | 
				
			||||||
 | 
					1373 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   sete %dl
 | 
				
			||||||
 | 
					1374 | 0.00        | 1.00        |             |             |      | 0.000 | 0.00 |      |      |      ||      |      |   movl $0, %ecx
 | 
				
			||||||
 | 
					1375 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   setne %bl
 | 
				
			||||||
 | 
					1376 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   cmovel %r8d, %ecx
 | 
				
			||||||
 | 
					1377 | 0.00        | 0.75        |             |             |      | 0.000 | 0.25 |      |      |      ||      |      |   movl %ebx, %r14d
 | 
				
			||||||
 | 
					1378 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   shll $4, %r14d
 | 
				
			||||||
 | 
					1379 | 0.00        | 0.50        |             |             |      | 0.000 | 0.50 |      |      |      ||      |      |   subl %ebp, %r14d
 | 
				
			||||||
 | 
					1380 | 0.00        | 0.75        |             |             |      | 0.000 | 0.25 |      |      |      ||      |      |   leal (%rcx,%rdi,2), %ecx
 | 
				
			||||||
 | 
					1381 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   shll $8, %ecx
 | 
				
			||||||
 | 
					1382 | 0.00        | 0.50        |             |             |      | 0.000 | 0.50 |      |      |      ||      |      |   addl $239, %r14d
 | 
				
			||||||
 | 
					1383 | 0.00        | 0.50        |             |             |      | 0.000 | 0.50 |      |      |      ||      |      |   addl $-768, %ecx                     # imm = 0xFD00
 | 
				
			||||||
 | 
					1384 | 0.00        | 1.00        |             |             |      | 0.000 | 0.00 |      |      |      ||      |      |   orl %r14d, %ecx
 | 
				
			||||||
 | 
					1385 | 1.00        |             |             |             |      |       |      |      |      |      ||      |      |   kmovd %ecx, %k2
 | 
				
			||||||
 | 
					1386 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vcmpltps %zmm0, %zmm22, %k2 {%k2}
 | 
				
			||||||
 | 
					1387 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vsubps %zmm21, %zmm11, %zmm21
 | 
				
			||||||
 | 
					1388 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vsubps %zmm20, %zmm15, %zmm20
 | 
				
			||||||
 | 
					1389 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vsubps %zmm19, %zmm7, %zmm19
 | 
				
			||||||
 | 
					1390 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vmulps %zmm2, %zmm23, %zmm22
 | 
				
			||||||
 | 
					1391 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||  4.0 |      |   vfmadd231ps %zmm22, %zmm18, %zmm12 {%k2} # zmm12 {%k2} = (zmm18 * zmm22) + zmm12
 | 
				
			||||||
 | 
					1392 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulps %zmm20, %zmm20, %zmm18
 | 
				
			||||||
 | 
					1393 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vfmadd231ps %zmm21, %zmm21, %zmm18  # zmm18 = (zmm21 * zmm21) + zmm18
 | 
				
			||||||
 | 
					1394 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vfmadd231ps %zmm19, %zmm19, %zmm18  # zmm18 = (zmm19 * zmm19) + zmm18
 | 
				
			||||||
 | 
					1395 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vfmadd231ps %zmm22, %zmm17, %zmm9 {%k2} # zmm9 {%k2} = (zmm17 * zmm22) + zmm9
 | 
				
			||||||
 | 
					1396 | 2.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vrcp14ps %zmm18, %zmm17
 | 
				
			||||||
 | 
					1397 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vfmadd231ps %zmm22, %zmm16, %zmm5 {%k2} # zmm5 {%k2} = (zmm16 * zmm22) + zmm5
 | 
				
			||||||
 | 
					1398 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulps %zmm17, %zmm26, %zmm16
 | 
				
			||||||
 | 
					1399 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulps %zmm16, %zmm17, %zmm16
 | 
				
			||||||
 | 
					1400 | 0.50        |             |             |             |      | 0.500 |      |      |      |      ||      |      |   vmulps %zmm16, %zmm17, %zmm16
 | 
				
			||||||
 | 
					1401 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vaddps %zmm1, %zmm16, %zmm22
 | 
				
			||||||
 | 
					1402 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vmulps %zmm17, %zmm27, %zmm17
 | 
				
			||||||
 | 
					1403 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vmulps %zmm22, %zmm17, %zmm17
 | 
				
			||||||
 | 
					1404 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vmulps %zmm17, %zmm16, %zmm16
 | 
				
			||||||
 | 
					1405 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   shll $6, %ebx
 | 
				
			||||||
 | 
					1406 | 0.00        | 1.00        |             |             |      | 0.000 | 0.00 |      |      |      ||      |      |   leal (%rbx,%rdi,4), %ecx
 | 
				
			||||||
 | 
					1407 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   shll $7, %edx
 | 
				
			||||||
 | 
					1408 | 0.00        | 1.00        |             |             |      | 0.000 | 0.00 |      |      |      ||      |      |   leal (%rdx,%rdi,8), %edx
 | 
				
			||||||
 | 
					1409 | 0.00        |             |             |             |      |       | 1.00 |      |      |      ||      |      |   shll $8, %edx
 | 
				
			||||||
 | 
					1410 | 0.00        | 1.00        |             |             |      | 0.000 | 0.00 |      |      |      ||      |      |   addl %edx, %ecx
 | 
				
			||||||
 | 
					1411 | 0.00        | 1.00        |             |             |      | 0.000 | 0.00 |      |      |      ||      |      |   addl $-2117, %ecx                    # imm = 0xF7BB
 | 
				
			||||||
 | 
					1412 | 1.00        |             |             |             |      |       |      |      |      |      ||      |      |   kmovd %ecx, %k2
 | 
				
			||||||
 | 
					1413 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vcmpltps %zmm0, %zmm18, %k2 {%k2}
 | 
				
			||||||
 | 
					1414 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vmulps %zmm2, %zmm16, %zmm16
 | 
				
			||||||
 | 
					1415 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vfmadd231ps %zmm16, %zmm19, %zmm13 {%k2} # zmm13 {%k2} = (zmm19 * zmm16) + zmm13
 | 
				
			||||||
 | 
					1416 | 0.00        |             |             |             |      | 1.000 |      |      |      |      ||      |      |   vfmadd231ps %zmm16, %zmm21, %zmm8 {%k2} # zmm8 {%k2} = (zmm21 * zmm16) + zmm8
 | 
				
			||||||
 | 
					1417 | 0.24        |             |             |             |      | 0.760 |      |      |      |      ||      |  4.0 |   vfmadd231ps %zmm16, %zmm20, %zmm4 {%k2} # zmm4 {%k2} = (zmm20 * zmm16) + zmm4
 | 
				
			||||||
 | 
					1418 | 0.00        | 1.00        |             |             |      | -0.01 | 0.00 |      |      |      ||      |      |   incq %rax
 | 
				
			||||||
 | 
					1419 | 0.00        | 1.00        |             |             |      | -0.01 | 0.00 |      |      |      ||      |      |   cmpq %rax, %r10
 | 
				
			||||||
 | 
					1420 |             |             |             |             |      |       |      |      |      |      ||      |      | * jne .LBB2_12
 | 
				
			||||||
 | 
					1421 |             |             |             |             |      |       |      |      |      |      ||      |      |   # LLVM-MCA-END
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					       22.5          16.5          2.00   2.00   2.00   2.00          22.49   16.5                           71    4.0  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Loop-Carried Dependencies Analysis Report
 | 
				
			||||||
 | 
					-----------------------------------------
 | 
				
			||||||
 | 
					1417 |  4.0 | vfmadd231ps	%zmm16, %zmm20, %zmm4 {%k2} # zmm4 {%k2} = (zmm20 * zmm16) + zmm4| [1417]
 | 
				
			||||||
 | 
					1416 |  4.0 | vfmadd231ps	%zmm16, %zmm21, %zmm8 {%k2} # zmm8 {%k2} = (zmm21 * zmm16) + zmm8| [1416]
 | 
				
			||||||
 | 
					1415 |  4.0 | vfmadd231ps	%zmm16, %zmm19, %zmm13 {%k2} # zmm13 {%k2} = (zmm19 * zmm16) + zmm13| [1415]
 | 
				
			||||||
 | 
					1397 |  4.0 | vfmadd231ps	%zmm22, %zmm16, %zmm5 {%k2} # zmm5 {%k2} = (zmm16 * zmm22) + zmm5| [1397]
 | 
				
			||||||
 | 
					1395 |  4.0 | vfmadd231ps	%zmm22, %zmm17, %zmm9 {%k2} # zmm9 {%k2} = (zmm17 * zmm22) + zmm9| [1395]
 | 
				
			||||||
 | 
					1391 |  4.0 | vfmadd231ps	%zmm22, %zmm18, %zmm12 {%k2} # zmm12 {%k2} = (zmm18 * zmm22) + zmm12| [1391]
 | 
				
			||||||
 | 
					1418 |  1.0 | incq	%rax                           | [1418]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
										
											Binary file not shown.
										
									
								
							@@ -1331,6 +1331,12 @@ computeForceLJ_2xnn_full:               #
 | 
				
			|||||||
	vxorps	%xmm8, %xmm8, %xmm8
 | 
						vxorps	%xmm8, %xmm8, %xmm8
 | 
				
			||||||
	vxorps	%xmm4, %xmm4, %xmm4
 | 
						vxorps	%xmm4, %xmm4, %xmm4
 | 
				
			||||||
	.p2align	4, 0x90
 | 
						.p2align	4, 0x90
 | 
				
			||||||
 | 
					movl      $111, %ebx # OSACA START MARKER
 | 
				
			||||||
 | 
					.byte     100        # OSACA START MARKER
 | 
				
			||||||
 | 
					.byte     103        # OSACA START MARKER
 | 
				
			||||||
 | 
					.byte     144        # OSACA START MARKER
 | 
				
			||||||
 | 
					# pointer_increment=64 0f91ac4f7fe1a70d0c899f7f3e745649
 | 
				
			||||||
 | 
					# LLVM-MCA-BEGIN
 | 
				
			||||||
.LBB2_12:                               #   Parent Loop BB2_7 Depth=1
 | 
					.LBB2_12:                               #   Parent Loop BB2_7 Depth=1
 | 
				
			||||||
                                        # =>  This Inner Loop Header: Depth=2
 | 
					                                        # =>  This Inner Loop Header: Depth=2
 | 
				
			||||||
	movslq	(%r11,%rax,4), %rcx
 | 
						movslq	(%r11,%rax,4), %rcx
 | 
				
			||||||
@@ -1412,6 +1418,11 @@ computeForceLJ_2xnn_full:               #
 | 
				
			|||||||
	incq	%rax
 | 
						incq	%rax
 | 
				
			||||||
	cmpq	%rax, %r10
 | 
						cmpq	%rax, %r10
 | 
				
			||||||
	jne	.LBB2_12
 | 
						jne	.LBB2_12
 | 
				
			||||||
 | 
					# LLVM-MCA-END
 | 
				
			||||||
 | 
					movl      $222, %ebx # OSACA END MARKER
 | 
				
			||||||
 | 
					.byte     100        # OSACA END MARKER
 | 
				
			||||||
 | 
					.byte     103        # OSACA END MARKER
 | 
				
			||||||
 | 
					.byte     144        # OSACA END MARKER
 | 
				
			||||||
# %bb.13:                               #   in Loop: Header=BB2_7 Depth=1
 | 
					# %bb.13:                               #   in Loop: Header=BB2_7 Depth=1
 | 
				
			||||||
	movq	%r15, %r14
 | 
						movq	%r15, %r14
 | 
				
			||||||
	movq	8(%rsp), %rbp                   # 8-byte Reload
 | 
						movq	8(%rsp), %rbp                   # 8-byte Reload
 | 
				
			||||||
@@ -1655,12 +1666,6 @@ computeForceLJ_4xn_half:                #
 | 
				
			|||||||
	vmovups	64(%rsp), %zmm6                 # 64-byte Reload
 | 
						vmovups	64(%rsp), %zmm6                 # 64-byte Reload
 | 
				
			||||||
	vmovups	512(%rsp), %zmm7                # 64-byte Reload
 | 
						vmovups	512(%rsp), %zmm7                # 64-byte Reload
 | 
				
			||||||
	.p2align	4, 0x90
 | 
						.p2align	4, 0x90
 | 
				
			||||||
movl      $111, %ebx # OSACA START MARKER
 | 
					 | 
				
			||||||
.byte     100        # OSACA START MARKER
 | 
					 | 
				
			||||||
.byte     103        # OSACA START MARKER
 | 
					 | 
				
			||||||
.byte     144        # OSACA START MARKER
 | 
					 | 
				
			||||||
# pointer_increment=64 0f91ac4f7fe1a70d0c899f7f3e745649
 | 
					 | 
				
			||||||
# LLVM-MCA-BEGIN
 | 
					 | 
				
			||||||
.LBB4_8:                                # =>This Inner Loop Header: Depth=1
 | 
					.LBB4_8:                                # =>This Inner Loop Header: Depth=1
 | 
				
			||||||
	movslq	(%r11,%rdx,4), %rax
 | 
						movslq	(%r11,%rdx,4), %rax
 | 
				
			||||||
	movq	%rax, %rsi
 | 
						movq	%rax, %rsi
 | 
				
			||||||
@@ -1793,11 +1798,6 @@ movl      $111, %ebx # OSACA START MARKER
 | 
				
			|||||||
	movq	160(%r15), %rdi
 | 
						movq	160(%r15), %rdi
 | 
				
			||||||
	incq	%rdx
 | 
						incq	%rdx
 | 
				
			||||||
	jmp	.LBB4_8
 | 
						jmp	.LBB4_8
 | 
				
			||||||
# LLVM-MCA-END
 | 
					 | 
				
			||||||
movl      $222, %ebx # OSACA END MARKER
 | 
					 | 
				
			||||||
.byte     100        # OSACA END MARKER
 | 
					 | 
				
			||||||
.byte     103        # OSACA END MARKER
 | 
					 | 
				
			||||||
.byte     144        # OSACA END MARKER
 | 
					 | 
				
			||||||
	.p2align	5, 0x90
 | 
						.p2align	5, 0x90
 | 
				
			||||||
.LBB4_18:
 | 
					.LBB4_18:
 | 
				
			||||||
	vzeroupper
 | 
						vzeroupper
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user