11b2d4bcc1
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
92 lines
11 KiB
Plaintext
92 lines
11 KiB
Plaintext
iwia021h@testfront1:~/MD-Bench/asm$ /home/hpc/iwia/iwia021h/.local/bin/osaca --ignore-unknown --arch=CSX force_aos_lt8_markers.s
|
|
Open Source Architecture Code Analyzer (OSACA) - 0.3.14
|
|
Analyzed file: force_aos_lt8_markers.s
|
|
Architecture: CSX
|
|
Timestamp: 2021-04-29 15:49:27
|
|
|
|
|
|
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
|
* - Instruction micro-ops not bound to a port
|
|
X - No throughput/latency information for this instruction in data file
|
|
|
|
|
|
Combined Analysis Report
|
|
------------------------
|
|
Port pressure in cycles
|
|
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
|
|
-------------------------------------------------------------------------------------------------
|
|
358 | | | | | | | | || | | # LOE rax rdx rcx rbp rsi rdi r8 r9 r10 r11d r14d xmm6 xmm7 xmm12 ymm15 ymm16 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14
|
|
359 | | | | | | | | || | | ..B1.33: # Preds ..B1.32
|
|
360 | | | | | | | | || | | # Execution count [2.50e+01]
|
|
361 | | 1.00 | | | | | | || 3.0 | | imulq %r8, %rcx #56.43
|
|
362 | | | | | | 1.00 | | || | 3.0 | vbroadcastsd %xmm6, %zmm4 #58.23
|
|
363 | | | | | | | | || | | X subl %r14d, %r11d #67.9
|
|
364 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || 1.0 | | addq %r10, %rcx #37.5
|
|
365 | | | | | | | | || | | X vpbroadcastd %r11d, %ymm0 #67.9
|
|
366 | | | | | | 1.00 | | || | | vpcmpgtd %ymm15, %ymm0, %k3 #67.9
|
|
367 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | movslq %r14d, %r14 #67.9
|
|
368 | 1.00 | | | | | | | || | | kmovw %k3, %ebx #67.9
|
|
369 | 0.00 | 1.00 | 0.50 0.50 | 0.50 0.50 | | 0.00 | | || 0.0 | | vmovdqu32 (%rcx,%r14,4), %ymm1{%k3}{z} #68.21
|
|
370 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm1, %ymm1, %ymm2 #69.36
|
|
371 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm2, %ymm1, %ymm0 #69.36
|
|
372 | | | | | | | | || | | # LOE rax rdx rbp rsi rdi r8 r9 r10 ebx xmm7 xmm12 ymm0 ymm15 ymm16 zmm4 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14 k3
|
|
373 | | | | | | | | || | | ..B1.36: # Preds ..B1.33
|
|
374 | | | | | | | | || | | # Execution count [1.25e+01]
|
|
375 | 1.00 | | | | | | | || | | kmovw %k3, %k1 #69.36
|
|
376 | 1.00 | | | | | | | || | | kmovw %k3, %k2 #69.36
|
|
377 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm1, %zmm1, %zmm1 #69.36
|
|
378 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm2, %zmm2, %zmm2 #69.36
|
|
379 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm3, %zmm3, %zmm3 #69.36
|
|
380 | 1.50 | 0.50 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 0.50 | || | | vgatherdpd 16(%rdi,%ymm0,8), %zmm1{%k1} #69.36
|
|
381 | 1.50 | 0.50 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 0.50 | || 4.0 | | vgatherdpd 8(%rdi,%ymm0,8), %zmm2{%k2} #69.36
|
|
382 | 1.50 | 0.00 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 1.00 | || | | vgatherdpd (%rdi,%ymm0,8), %zmm3{%k3} #69.36
|
|
383 | | | | | | | | || | | # LOE rax rdx rbp rsi rdi r8 r9 r10 ebx xmm7 xmm12 ymm15 ymm16 zmm1 zmm2 zmm3 zmm4 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14
|
|
384 | | | | | | | | || | | ..B1.37: # Preds ..B1.36
|
|
385 | | | | | | | | || | | # Execution count [2.50e+01]
|
|
386 | | | | | | 1.00 | | || | | vbroadcastsd %xmm7, %zmm7 #59.23
|
|
387 | | | | | | 1.00 | | || | | vbroadcastsd %xmm12, %zmm12 #60.23
|
|
388 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm1, %zmm12, %zmm23 #71.36
|
|
389 | 0.50 | | | | | 0.50 | | || 4.0 | | vsubpd %zmm2, %zmm7, %zmm21 #70.36
|
|
390 | 0.50 | | | | | 0.50 | | || | 4.0 | vsubpd %zmm3, %zmm4, %zmm20 #69.36
|
|
391 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm21, %zmm21, %zmm19 #72.49
|
|
392 | 0.50 | | | | | 0.50 | | || 4.0 | 4.0 | vfmadd231pd %zmm20, %zmm20, %zmm19 #72.49
|
|
393 | 0.50 | | | | | 0.50 | | || 4.0 | 4.0 | vfmadd231pd %zmm23, %zmm23, %zmm19 #72.63
|
|
394 | 2.50 | | | | | 0.50 | | || 8.0 | 8.0 | vrcp14pd %zmm19, %zmm18 #75.38
|
|
395 | | | | | | 1.00 | | || | | vcmppd $1, %zmm14, %zmm19, %k2 #74.22
|
|
396 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm18, %k0 #75.38
|
|
397 | 1.00 | | | | | | | || | | kmovw %k2, %ecx #74.22
|
|
398 | 1.00 | | | | | | | || | | knotw %k0, %k1 #75.38
|
|
399 | | | | | | | | || | | * vmovaps %zmm19, %zmm0 #75.38
|
|
400 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | andl %ecx, %ebx #74.22
|
|
401 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || 4.0 | | vfnmadd213pd .L_2il0floatpacket.9(%rip){1to8}, %zmm18, %zmm0 #75.38
|
|
402 | 1.00 | | | | | | | || | | kmovw %ebx, %k3 #78.17
|
|
403 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm0, %zmm0, %zmm1 #75.38
|
|
404 | 0.50 | | | | | 0.50 | | || | 4.0 | vfmadd213pd %zmm18, %zmm0, %zmm18{%k1} #75.38
|
|
405 | 0.50 | | | | | 0.50 | | || 4.0 | 4.0 | vfmadd213pd %zmm18, %zmm1, %zmm18{%k1} #75.38
|
|
406 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm13, %zmm18, %zmm2 #76.38
|
|
407 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm10, %zmm18, %zmm4 #77.54
|
|
408 | 0.50 | | | | | 0.50 | | || 4.0 | 4.0 | vmulpd %zmm2, %zmm18, %zmm6 #76.44
|
|
409 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm6, %zmm18, %zmm3 #76.50
|
|
410 | 0.50 | | | | | 0.50 | | || | | vfmsub213pd %zmm5, %zmm6, %zmm18 #77.54
|
|
411 | 0.00 | | | | | 1.00 | | || 4.0 | | vmulpd %zmm4, %zmm3, %zmm17 #77.61
|
|
412 | 0.00 | | | | | 1.00 | | || 4.0 | | vmulpd %zmm18, %zmm17, %zmm22 #77.67
|
|
413 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm20, %zmm22, %zmm9{%k3} #78.17
|
|
414 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm21, %zmm22, %zmm8{%k3} #79.17
|
|
415 | 0.00 | | | | | 1.00 | | || 4.0 | | vfmadd231pd %zmm23, %zmm22, %zmm11{%k3} #80.17
|
|
|
|
22.0 5.00 13.0 2.50 13.0 2.50 22.0 5.00 70.0 35.0
|
|
|
|
|
|
Loop-Carried Dependencies Analysis Report
|
|
-----------------------------------------
|
|
363 | 0.0 | subl %r14d, %r11d #67.9| [363]
|
|
367 | 1.0 | movslq %r14d, %r14 #67.9| [367]
|
|
386 | 3.0 | vbroadcastsd %xmm7, %zmm7 #59.23| [386]
|
|
387 | 3.0 | vbroadcastsd %xmm12, %zmm12 #60.23| [387]
|
|
415 | 4.0 | vfmadd231pd %zmm23, %zmm22, %zmm11{%k3} #80.17| [415]
|
|
414 | 4.0 | vfmadd231pd %zmm21, %zmm22, %zmm8{%k3} #79.17| [414]
|
|
413 | 4.0 | vfmadd231pd %zmm20, %zmm22, %zmm9{%k3} #78.17| [413]
|
|
397 | 28.0 | kmovw %k2, %ecx #74.22| [361, 364, 369, 371, 382, 390, 392, 393, 395, 397]
|
|
408 | 35.0 | vmulpd %zmm2, %zmm18, %zmm6 #76.44| [362, 390, 392, 393, 394, 404, 405, 408]
|
|
|