Cleanup and move gather-bench to util folder

This commit is contained in:
2023-08-15 15:21:21 +02:00
parent 151f0c0e6f
commit 19209bdcce
98 changed files with 2104 additions and 38712 deletions

View File

@@ -0,0 +1,63 @@
.intel_syntax noprefix
.data
.align 64
SCALAR:
.double 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
# rdi -> a
# rsi -> idx
# rdx -> N
# rcx -> t
.text
.globl gather
.type gather, @function
gather :
push rbp
mov rbp, rsp
push rbx
push r12
push r13
push r14
push r15
xor rax, rax
vpcmpeqd ymm0, ymm0, ymm0
.align 16
1:
vmovups xmm1, [rsi + rax * 4]
vmovups xmm2, [rsi + rax * 4 + 16]
vmovups xmm3, [rsi + rax * 4 + 32]
vmovups xmm4, [rsi + rax * 4 + 48]
vmovdqa ymm5, ymm0
vmovdqa ymm6, ymm0
vmovdqa ymm7, ymm0
vmovdqa ymm8, ymm0
vxorpd ymm9, ymm9, ymm9
vxorpd ymm10, ymm10, ymm10
vxorpd ymm11, ymm11, ymm11
vxorpd ymm12, ymm12, ymm12
vgatherdpd ymm9, [rdi + xmm1 * 8], ymm5
vgatherdpd ymm10, [rdi + xmm2 * 8], ymm6
vgatherdpd ymm11, [rdi + xmm3 * 8], ymm7
vgatherdpd ymm12, [rdi + xmm4 * 8], ymm8
#ifdef TEST
vmovapd [rcx + rax * 8], ymm9
vmovapd [rcx + rax * 8 + 32], ymm10
vmovapd [rcx + rax * 8 + 64], ymm11
vmovapd [rcx + rax * 8 + 96], ymm12
#endif
addq rax, 16
cmpq rax, rdx
jl 1b
pop r15
pop r14
pop r13
pop r12
pop rbx
mov rsp, rbp
pop rbp
ret
.size gather, .-gather

View File

@@ -0,0 +1,71 @@
.intel_syntax noprefix
.data
.align 64
SCALAR:
.double 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
# rdi -> a
# rsi -> idx
# rdx -> N
# rcx -> t
.text
.globl gather_aos
.type gather_aos, @function
gather_aos :
push rbp
mov rbp, rsp
push rbx
push r9
push r10
push r11
push r12
push r13
push r14
push r15
xor rax, rax
vpcmpeqd ymm8, ymm8, ymm8
.align 16
1:
vmovups xmm3, XMMWORD PTR [rsi + rax * 4]
vpaddd xmm4, xmm3, xmm3
#ifdef PADDING
vpaddd xmm3, xmm4, xmm4
#else
vpaddd xmm3, xmm3, xmm4
#endif
vmovdqa ymm5, ymm8
vmovdqa ymm6, ymm8
vmovdqa ymm7, ymm8
vxorpd ymm0, ymm0, ymm0
vxorpd ymm1, ymm1, ymm1
vxorpd ymm2, ymm2, ymm2
vgatherdpd ymm0, [ rdi + xmm3 * 8], ymm5
vgatherdpd ymm1, [8 + rdi + xmm3 * 8], ymm6
vgatherdpd ymm2, [16 + rdi + xmm3 * 8], ymm7
#ifdef TEST
vmovupd [rcx + rax * 8], ymm0
lea rbx, [rcx + rdx * 8]
vmovupd [rbx + rax * 8], ymm1
lea r9, [rbx + rdx * 8]
vmovupd [r9 + rax * 8], ymm2
#endif
addq rax, 4
cmpq rax, rdx
jl 1b
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop rbx
mov rsp, rbp
pop rbp
ret
.size gather_aos, .-gather_aos

View File

@@ -0,0 +1,67 @@
.intel_syntax noprefix
.data
.align 64
SCALAR:
.double 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
# rdi -> a
# rsi -> idx
# rdx -> N
# rcx -> t
.text
.globl gather_soa
.type gather_soa, @function
gather_soa :
push rbp
mov rbp, rsp
push rbx
push r9
push r10
push r11
push r12
push r13
push r14
push r15
xor rax, rax
vpcmpeqd ymm8, ymm8, ymm8
lea r8, [rdi + rdx * 8]
lea r9, [r8 + rdx * 8]
.align 16
1:
vmovups xmm3, XMMWORD PTR [rsi + rax * 4]
vmovdqa ymm5, ymm8
vmovdqa ymm6, ymm8
vmovdqa ymm7, ymm8
vxorpd ymm0, ymm0, ymm0
vxorpd ymm1, ymm1, ymm1
vxorpd ymm2, ymm2, ymm2
vgatherdpd ymm0, [rdi + xmm3 * 8], ymm5
vgatherdpd ymm1, [r8 + xmm3 * 8], ymm6
vgatherdpd ymm2, [r9 + xmm3 * 8], ymm7
#ifdef TEST
vmovupd [rcx + rax * 8], ymm0
lea rbx, [rcx + rdx * 8]
vmovupd [rbx + rax * 8], ymm1
lea r10, [rbx + rdx * 8]
vmovupd [r10 + rax * 8], ymm2
#endif
addq rax, 4
cmpq rax, rdx
jl 1b
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop rbx
mov rsp, rbp
pop rbp
ret
.size gather_soa, .-gather_soa