iwia021h@testfront1:~/MD-Bench/asm$ /home/hpc/iwia/iwia021h/.local/bin/osaca --ignore-unknown --arch=CSX force_aos_lt8_markers.s Open Source Architecture Code Analyzer (OSACA) - 0.3.14 Analyzed file: force_aos_lt8_markers.s Architecture: CSX Timestamp: 2021-04-29 15:49:27 P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction * - Instruction micro-ops not bound to a port X - No throughput/latency information for this instruction in data file Combined Analysis Report ------------------------ Port pressure in cycles | 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD | ------------------------------------------------------------------------------------------------- 358 | | | | | | | | || | | # LOE rax rdx rcx rbp rsi rdi r8 r9 r10 r11d r14d xmm6 xmm7 xmm12 ymm15 ymm16 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14 359 | | | | | | | | || | | ..B1.33: # Preds ..B1.32 360 | | | | | | | | || | | # Execution count [2.50e+01] 361 | | 1.00 | | | | | | || 3.0 | | imulq %r8, %rcx #56.43 362 | | | | | | 1.00 | | || | 3.0 | vbroadcastsd %xmm6, %zmm4 #58.23 363 | | | | | | | | || | | X subl %r14d, %r11d #67.9 364 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || 1.0 | | addq %r10, %rcx #37.5 365 | | | | | | | | || | | X vpbroadcastd %r11d, %ymm0 #67.9 366 | | | | | | 1.00 | | || | | vpcmpgtd %ymm15, %ymm0, %k3 #67.9 367 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | movslq %r14d, %r14 #67.9 368 | 1.00 | | | | | | | || | | kmovw %k3, %ebx #67.9 369 | 0.00 | 1.00 | 0.50 0.50 | 0.50 0.50 | | 0.00 | | || 0.0 | | vmovdqu32 (%rcx,%r14,4), %ymm1{%k3}{z} #68.21 370 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm1, %ymm1, %ymm2 #69.36 371 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm2, %ymm1, %ymm0 #69.36 372 | | | | | | | | || | | # LOE rax rdx rbp rsi rdi r8 r9 r10 ebx xmm7 xmm12 ymm0 ymm15 ymm16 zmm4 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14 k3 373 | | | | | | | | || | | ..B1.36: # Preds ..B1.33 374 | | | | | | | | || | | # Execution count [1.25e+01] 375 | 1.00 | | | | | | | || | | kmovw %k3, %k1 #69.36 376 | 1.00 | | | | | | | || | | kmovw %k3, %k2 #69.36 377 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm1, %zmm1, %zmm1 #69.36 378 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm2, %zmm2, %zmm2 #69.36 379 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm3, %zmm3, %zmm3 #69.36 380 | 1.50 | 0.50 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 0.50 | || | | vgatherdpd 16(%rdi,%ymm0,8), %zmm1{%k1} #69.36 381 | 1.50 | 0.50 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 0.50 | || 4.0 | | vgatherdpd 8(%rdi,%ymm0,8), %zmm2{%k2} #69.36 382 | 1.50 | 0.00 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 1.00 | || | | vgatherdpd (%rdi,%ymm0,8), %zmm3{%k3} #69.36 383 | | | | | | | | || | | # LOE rax rdx rbp rsi rdi r8 r9 r10 ebx xmm7 xmm12 ymm15 ymm16 zmm1 zmm2 zmm3 zmm4 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14 384 | | | | | | | | || | | ..B1.37: # Preds ..B1.36 385 | | | | | | | | || | | # Execution count [2.50e+01] 386 | | | | | | 1.00 | | || | | vbroadcastsd %xmm7, %zmm7 #59.23 387 | | | | | | 1.00 | | || | | vbroadcastsd %xmm12, %zmm12 #60.23 388 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm1, %zmm12, %zmm23 #71.36 389 | 0.50 | | | | | 0.50 | | || 4.0 | | vsubpd %zmm2, %zmm7, %zmm21 #70.36 390 | 0.50 | | | | | 0.50 | | || | 4.0 | vsubpd %zmm3, %zmm4, %zmm20 #69.36 391 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm21, %zmm21, %zmm19 #72.49 392 | 0.50 | | | | | 0.50 | | || 4.0 | 4.0 | vfmadd231pd %zmm20, %zmm20, %zmm19 #72.49 393 | 0.50 | | | | | 0.50 | | || 4.0 | 4.0 | vfmadd231pd %zmm23, %zmm23, %zmm19 #72.63 394 | 2.50 | | | | | 0.50 | | || 8.0 | 8.0 | vrcp14pd %zmm19, %zmm18 #75.38 395 | | | | | | 1.00 | | || | | vcmppd $1, %zmm14, %zmm19, %k2 #74.22 396 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm18, %k0 #75.38 397 | 1.00 | | | | | | | || | | kmovw %k2, %ecx #74.22 398 | 1.00 | | | | | | | || | | knotw %k0, %k1 #75.38 399 | | | | | | | | || | | * vmovaps %zmm19, %zmm0 #75.38 400 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | andl %ecx, %ebx #74.22 401 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || 4.0 | | vfnmadd213pd .L_2il0floatpacket.9(%rip){1to8}, %zmm18, %zmm0 #75.38 402 | 1.00 | | | | | | | || | | kmovw %ebx, %k3 #78.17 403 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm0, %zmm0, %zmm1 #75.38 404 | 0.50 | | | | | 0.50 | | || | 4.0 | vfmadd213pd %zmm18, %zmm0, %zmm18{%k1} #75.38 405 | 0.50 | | | | | 0.50 | | || 4.0 | 4.0 | vfmadd213pd %zmm18, %zmm1, %zmm18{%k1} #75.38 406 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm13, %zmm18, %zmm2 #76.38 407 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm10, %zmm18, %zmm4 #77.54 408 | 0.50 | | | | | 0.50 | | || 4.0 | 4.0 | vmulpd %zmm2, %zmm18, %zmm6 #76.44 409 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm6, %zmm18, %zmm3 #76.50 410 | 0.50 | | | | | 0.50 | | || | | vfmsub213pd %zmm5, %zmm6, %zmm18 #77.54 411 | 0.00 | | | | | 1.00 | | || 4.0 | | vmulpd %zmm4, %zmm3, %zmm17 #77.61 412 | 0.00 | | | | | 1.00 | | || 4.0 | | vmulpd %zmm18, %zmm17, %zmm22 #77.67 413 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm20, %zmm22, %zmm9{%k3} #78.17 414 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm21, %zmm22, %zmm8{%k3} #79.17 415 | 0.00 | | | | | 1.00 | | || 4.0 | | vfmadd231pd %zmm23, %zmm22, %zmm11{%k3} #80.17 22.0 5.00 13.0 2.50 13.0 2.50 22.0 5.00 70.0 35.0 Loop-Carried Dependencies Analysis Report ----------------------------------------- 363 | 0.0 | subl %r14d, %r11d #67.9| [363] 367 | 1.0 | movslq %r14d, %r14 #67.9| [367] 386 | 3.0 | vbroadcastsd %xmm7, %zmm7 #59.23| [386] 387 | 3.0 | vbroadcastsd %xmm12, %zmm12 #60.23| [387] 415 | 4.0 | vfmadd231pd %zmm23, %zmm22, %zmm11{%k3} #80.17| [415] 414 | 4.0 | vfmadd231pd %zmm21, %zmm22, %zmm8{%k3} #79.17| [414] 413 | 4.0 | vfmadd231pd %zmm20, %zmm22, %zmm9{%k3} #78.17| [413] 397 | 28.0 | kmovw %k2, %ecx #74.22| [361, 364, 369, 371, 382, 390, 392, 393, 395, 397] 408 | 35.0 | vmulpd %zmm2, %zmm18, %zmm6 #76.44| [362, 390, 392, 393, 394, 404, 405, 408]