Cleanup and move gather-bench to util folder
This commit is contained in:
52
util/gather-bench/.gitignore
vendored
Normal file
52
util/gather-bench/.gitignore
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
# Prerequisites
|
||||
*.d
|
||||
|
||||
# Object files
|
||||
*.o
|
||||
*.ko
|
||||
*.obj
|
||||
*.elf
|
||||
|
||||
# Linker output
|
||||
*.ilk
|
||||
*.map
|
||||
*.exp
|
||||
|
||||
# Precompiled Headers
|
||||
*.gch
|
||||
*.pch
|
||||
|
||||
# Libraries
|
||||
*.lib
|
||||
*.a
|
||||
*.la
|
||||
*.lo
|
||||
|
||||
# Shared objects (inc. Windows DLLs)
|
||||
*.dll
|
||||
*.so
|
||||
*.so.*
|
||||
*.dylib
|
||||
|
||||
# Executables
|
||||
*.exe
|
||||
*.out
|
||||
*.app
|
||||
*.i*86
|
||||
*.x86_64
|
||||
*.hex
|
||||
|
||||
# Debug files
|
||||
*.dSYM/
|
||||
*.su
|
||||
*.idb
|
||||
*.pdb
|
||||
|
||||
# Kernel Module Compile Results
|
||||
*.mod*
|
||||
*.cmd
|
||||
.tmp_versions/
|
||||
modules.order
|
||||
Module.symvers
|
||||
Mkfile.old
|
||||
dkms.conf
|
21
util/gather-bench/LICENSE
Normal file
21
util/gather-bench/LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2021 RRZE-HPC
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
126
util/gather-bench/Makefile
Normal file
126
util/gather-bench/Makefile
Normal file
@@ -0,0 +1,126 @@
|
||||
#CONFIGURE BUILD SYSTEM
|
||||
TARGET = gather-bench-$(TAG)
|
||||
BUILD_DIR = ./$(TAG)
|
||||
SRC_DIR = ./src
|
||||
MAKE_DIR = ./
|
||||
ISA_DIR = ./src/$(ISA)
|
||||
Q ?= @
|
||||
|
||||
#DO NOT EDIT BELOW
|
||||
include $(MAKE_DIR)/config.mk
|
||||
include $(MAKE_DIR)/include_$(TAG).mk
|
||||
include $(MAKE_DIR)/include_LIKWID.mk
|
||||
INCLUDES += -I./src/includes
|
||||
|
||||
VPATH = $(SRC_DIR) ${ISA_DIR}
|
||||
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
|
||||
ASM += $(patsubst $(SRC_DIR)/%.f90, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.f90))
|
||||
OBJ = $(filter-out $(BUILD_DIR)/main%, $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
|
||||
OBJ += $(patsubst $(SRC_DIR)/%.cc, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.cc))
|
||||
OBJ += $(patsubst $(SRC_DIR)/%.cpp, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.cpp))
|
||||
OBJ += $(patsubst $(SRC_DIR)/%.f90, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.f90))
|
||||
OBJ += $(patsubst $(SRC_DIR)/%.F90, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.F90))
|
||||
OBJ += $(patsubst $(SRC_DIR)/%.s, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.s))
|
||||
OBJ += $(patsubst $(ISA_DIR)/%.S, $(BUILD_DIR)/%.o,$(wildcard $(ISA_DIR)/*.S))
|
||||
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(INCLUDES) -DISA_$(ISA)
|
||||
|
||||
ifneq ($(VARIANT),)
|
||||
.DEFAULT_GOAL := ${TARGET}-$(VARIANT)
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(DATA_LAYOUT)),AOS)
|
||||
CPPFLAGS += -DAOS
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(TEST)),true)
|
||||
CPPFLAGS += -DTEST
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(PADDING)),true)
|
||||
CPPFLAGS += -DPADDING
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(MEASURE_GATHER_CYCLES)),true)
|
||||
CPPFLAGS += -DMEASURE_GATHER_CYCLES
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(ONLY_FIRST_DIMENSION)),true)
|
||||
CPPFLAGS += -DONLY_FIRST_DIMENSION
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(MEM_TRACER)),true)
|
||||
CPPFLAGS += -DMEM_TRACER
|
||||
endif
|
||||
|
||||
${TARGET}: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main.c
|
||||
@echo "===> LINKING $(TARGET)"
|
||||
$(Q)${LINKER} ${CPPFLAGS} ${LFLAGS} -o $(TARGET) $(SRC_DIR)/main.c $(OBJ) $(LIBS)
|
||||
|
||||
${TARGET}-%: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main-%.c
|
||||
@echo "===> LINKING $(TARGET)-$* "
|
||||
$(Q)${LINKER} ${CPPFLAGS} ${LFLAGS} -o $(TARGET)-$* $(SRC_DIR)/main-$*.c $(OBJ) $(LIBS)
|
||||
|
||||
asm: $(BUILD_DIR) $(ASM)
|
||||
|
||||
$(BUILD_DIR)/%.o: %.c
|
||||
@echo "===> COMPILE $@"
|
||||
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
||||
$(Q)$(CC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
|
||||
|
||||
$(BUILD_DIR)/%.s: %.c
|
||||
@echo "===> GENERATE ASM $@"
|
||||
$(Q)$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
|
||||
|
||||
$(BUILD_DIR)/%.s: %.f90
|
||||
@echo "===> COMPILE $@"
|
||||
$(Q)$(FC) -S $(FCFLAGS) $< -o $@
|
||||
|
||||
$(BUILD_DIR)/%.o: %.cc
|
||||
@echo "===> COMPILE $@"
|
||||
$(Q)$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@
|
||||
$(Q)$(CXX) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
|
||||
|
||||
$(BUILD_DIR)/%.o: %.cpp
|
||||
@echo "===> COMPILE $@"
|
||||
$(Q)$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@
|
||||
$(Q)$(CXX) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
|
||||
|
||||
$(BUILD_DIR)/%.o: %.f90
|
||||
@echo "===> COMPILE $@"
|
||||
$(Q)$(FC) -c $(FCFLAGS) $< -o $@
|
||||
|
||||
$(BUILD_DIR)/%.o: %.F90
|
||||
@echo "===> COMPILE $@"
|
||||
$(Q)$(FC) -c $(CPPFLAGS) $(FCFLAGS) $< -o $@
|
||||
|
||||
$(BUILD_DIR)/%.o: %.s
|
||||
@echo "===> ASSEMBLE $@"
|
||||
$(Q)$(AS) $(ASFLAGS) $< -o $@
|
||||
|
||||
$(BUILD_DIR)/%.o: %.S
|
||||
@echo "===> ASSEMBLE $@"
|
||||
$(Q)$(CC) -c $(CPPFLAGS) $< -o $@
|
||||
|
||||
tags:
|
||||
@echo "===> GENERATE TAGS"
|
||||
$(Q)ctags -R
|
||||
|
||||
|
||||
$(BUILD_DIR):
|
||||
@mkdir $(BUILD_DIR)
|
||||
|
||||
ifeq ($(findstring $(MAKECMDGOALS),clean),)
|
||||
-include $(OBJ:.o=.d)
|
||||
endif
|
||||
|
||||
.PHONY: clean distclean
|
||||
|
||||
clean:
|
||||
@echo "===> CLEAN"
|
||||
@rm -rf $(BUILD_DIR)
|
||||
@rm -f tags
|
||||
|
||||
distclean: clean
|
||||
@echo "===> DIST CLEAN"
|
||||
@rm -f $(TARGET)
|
||||
@rm -f tags
|
2
util/gather-bench/README.md
Normal file
2
util/gather-bench/README.md
Normal file
@@ -0,0 +1,2 @@
|
||||
# gather-bench
|
||||
A X86 gather instruction performance benchmark
|
22
util/gather-bench/config.mk
Normal file
22
util/gather-bench/config.mk
Normal file
@@ -0,0 +1,22 @@
|
||||
# Supported: GCC, CLANG, ICC
|
||||
TAG ?= ICC
|
||||
# Supported: avx2, avx512
|
||||
ISA ?= avx512
|
||||
# Use likwid?
|
||||
ENABLE_LIKWID ?= false
|
||||
|
||||
# SP or DP
|
||||
DATA_TYPE ?= DP
|
||||
# AOS or SOA
|
||||
DATA_LAYOUT ?= AOS
|
||||
# Padding byte for AoS
|
||||
PADDING ?= false
|
||||
# Measure cycles for each gather separately
|
||||
MEASURE_GATHER_CYCLES ?= false
|
||||
# Gather data only for first dimension (one gather per iteration)
|
||||
ONLY_FIRST_DIMENSION ?= false
|
||||
|
||||
# Trace memory addresses for cache simulator
|
||||
MEM_TRACER ?= false
|
||||
# Test correctness of gather kernels
|
||||
TEST ?= false
|
9
util/gather-bench/include_CLANG.mk
Normal file
9
util/gather-bench/include_CLANG.mk
Normal file
@@ -0,0 +1,9 @@
|
||||
CC = clang
|
||||
LINKER = $(CC)
|
||||
|
||||
OPENMP =# -fopenmp
|
||||
CFLAGS = -Ofast -std=c11 -march=core-avx2 -mavx -mfma $(OPENMP)
|
||||
LFLAGS = $(OPENMP) -march=core-avx2 -mavx -mfma
|
||||
DEFINES = -D_GNU_SOURCE
|
||||
INCLUDES =
|
||||
LIBS =
|
11
util/gather-bench/include_GCC.mk
Normal file
11
util/gather-bench/include_GCC.mk
Normal file
@@ -0,0 +1,11 @@
|
||||
CC = gcc
|
||||
AS = as
|
||||
LINKER = $(CC)
|
||||
|
||||
OPENMP = -fopenmp
|
||||
CFLAGS = -Ofast -std=c11 -mavx2 -mfma $(OPENMP)
|
||||
ASFLAGS =
|
||||
LFLAGS = $(OPENMP) -mavx2 -mfma
|
||||
DEFINES = -D_GNU_SOURCE
|
||||
INCLUDES =
|
||||
LIBS =
|
9
util/gather-bench/include_ICC.mk
Normal file
9
util/gather-bench/include_ICC.mk
Normal file
@@ -0,0 +1,9 @@
|
||||
CC = icc
|
||||
LINKER = $(CC)
|
||||
|
||||
OPENMP = -qopenmp
|
||||
CFLAGS = -Ofast -xhost -std=c11 $(OPENMP)
|
||||
LFLAGS = $(OPENMP)
|
||||
DEFINES = -D_GNU_SOURCE
|
||||
INCLUDES =
|
||||
LIBS =
|
10
util/gather-bench/include_LIKWID.mk
Normal file
10
util/gather-bench/include_LIKWID.mk
Normal file
@@ -0,0 +1,10 @@
|
||||
LIKWID_INC ?= -I/usr/local/include
|
||||
LIKWID_DEFINES ?= -DLIKWID_PERFMON
|
||||
LIKWID_LIB ?= -L/usr/local/lib
|
||||
|
||||
ifeq ($(strip $(ENABLE_LIKWID)),true)
|
||||
INCLUDES += ${LIKWID_INC}
|
||||
DEFINES += ${LIKWID_DEFINES}
|
||||
LIBS += -llikwid
|
||||
LFLAGS += ${LIKWID_LIB}
|
||||
endif
|
57
util/gather-bench/src/allocate.c
Normal file
57
util/gather-bench/src/allocate.c
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
|
||||
void* allocate (int alignment, size_t bytesize)
|
||||
{
|
||||
int errorCode;
|
||||
void* ptr;
|
||||
|
||||
errorCode = posix_memalign(&ptr, alignment, bytesize);
|
||||
|
||||
if (errorCode) {
|
||||
if (errorCode == EINVAL) {
|
||||
fprintf(stderr,
|
||||
"Error: Alignment parameter is not a power of two\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (errorCode == ENOMEM) {
|
||||
fprintf(stderr,
|
||||
"Error: Insufficient memory to fulfill the request\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
if (ptr == NULL) {
|
||||
fprintf(stderr, "Error: posix_memalign failed!\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
63
util/gather-bench/src/avx2/gather.S
Normal file
63
util/gather-bench/src/avx2/gather.S
Normal file
@@ -0,0 +1,63 @@
|
||||
.intel_syntax noprefix
|
||||
.data
|
||||
.align 64
|
||||
SCALAR:
|
||||
.double 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
|
||||
|
||||
# rdi -> a
|
||||
# rsi -> idx
|
||||
# rdx -> N
|
||||
# rcx -> t
|
||||
.text
|
||||
.globl gather
|
||||
.type gather, @function
|
||||
gather :
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push rbx
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
xor rax, rax
|
||||
vpcmpeqd ymm0, ymm0, ymm0
|
||||
.align 16
|
||||
1:
|
||||
vmovups xmm1, [rsi + rax * 4]
|
||||
vmovups xmm2, [rsi + rax * 4 + 16]
|
||||
vmovups xmm3, [rsi + rax * 4 + 32]
|
||||
vmovups xmm4, [rsi + rax * 4 + 48]
|
||||
vmovdqa ymm5, ymm0
|
||||
vmovdqa ymm6, ymm0
|
||||
vmovdqa ymm7, ymm0
|
||||
vmovdqa ymm8, ymm0
|
||||
vxorpd ymm9, ymm9, ymm9
|
||||
vxorpd ymm10, ymm10, ymm10
|
||||
vxorpd ymm11, ymm11, ymm11
|
||||
vxorpd ymm12, ymm12, ymm12
|
||||
vgatherdpd ymm9, [rdi + xmm1 * 8], ymm5
|
||||
vgatherdpd ymm10, [rdi + xmm2 * 8], ymm6
|
||||
vgatherdpd ymm11, [rdi + xmm3 * 8], ymm7
|
||||
vgatherdpd ymm12, [rdi + xmm4 * 8], ymm8
|
||||
|
||||
#ifdef TEST
|
||||
vmovapd [rcx + rax * 8], ymm9
|
||||
vmovapd [rcx + rax * 8 + 32], ymm10
|
||||
vmovapd [rcx + rax * 8 + 64], ymm11
|
||||
vmovapd [rcx + rax * 8 + 96], ymm12
|
||||
#endif
|
||||
|
||||
addq rax, 16
|
||||
cmpq rax, rdx
|
||||
jl 1b
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rbx
|
||||
mov rsp, rbp
|
||||
pop rbp
|
||||
ret
|
||||
.size gather, .-gather
|
71
util/gather-bench/src/avx2/gather_aos.S
Normal file
71
util/gather-bench/src/avx2/gather_aos.S
Normal file
@@ -0,0 +1,71 @@
|
||||
.intel_syntax noprefix
|
||||
.data
|
||||
.align 64
|
||||
SCALAR:
|
||||
.double 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
|
||||
|
||||
# rdi -> a
|
||||
# rsi -> idx
|
||||
# rdx -> N
|
||||
# rcx -> t
|
||||
.text
|
||||
.globl gather_aos
|
||||
.type gather_aos, @function
|
||||
gather_aos :
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push rbx
|
||||
push r9
|
||||
push r10
|
||||
push r11
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
xor rax, rax
|
||||
vpcmpeqd ymm8, ymm8, ymm8
|
||||
.align 16
|
||||
1:
|
||||
|
||||
vmovups xmm3, XMMWORD PTR [rsi + rax * 4]
|
||||
vpaddd xmm4, xmm3, xmm3
|
||||
#ifdef PADDING
|
||||
vpaddd xmm3, xmm4, xmm4
|
||||
#else
|
||||
vpaddd xmm3, xmm3, xmm4
|
||||
#endif
|
||||
vmovdqa ymm5, ymm8
|
||||
vmovdqa ymm6, ymm8
|
||||
vmovdqa ymm7, ymm8
|
||||
vxorpd ymm0, ymm0, ymm0
|
||||
vxorpd ymm1, ymm1, ymm1
|
||||
vxorpd ymm2, ymm2, ymm2
|
||||
vgatherdpd ymm0, [ rdi + xmm3 * 8], ymm5
|
||||
vgatherdpd ymm1, [8 + rdi + xmm3 * 8], ymm6
|
||||
vgatherdpd ymm2, [16 + rdi + xmm3 * 8], ymm7
|
||||
|
||||
#ifdef TEST
|
||||
vmovupd [rcx + rax * 8], ymm0
|
||||
lea rbx, [rcx + rdx * 8]
|
||||
vmovupd [rbx + rax * 8], ymm1
|
||||
lea r9, [rbx + rdx * 8]
|
||||
vmovupd [r9 + rax * 8], ymm2
|
||||
#endif
|
||||
|
||||
addq rax, 4
|
||||
cmpq rax, rdx
|
||||
jl 1b
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop r11
|
||||
pop r10
|
||||
pop r9
|
||||
pop rbx
|
||||
mov rsp, rbp
|
||||
pop rbp
|
||||
ret
|
||||
.size gather_aos, .-gather_aos
|
67
util/gather-bench/src/avx2/gather_soa.S
Normal file
67
util/gather-bench/src/avx2/gather_soa.S
Normal file
@@ -0,0 +1,67 @@
|
||||
.intel_syntax noprefix
|
||||
.data
|
||||
.align 64
|
||||
SCALAR:
|
||||
.double 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
|
||||
|
||||
# rdi -> a
|
||||
# rsi -> idx
|
||||
# rdx -> N
|
||||
# rcx -> t
|
||||
.text
|
||||
.globl gather_soa
|
||||
.type gather_soa, @function
|
||||
gather_soa :
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push rbx
|
||||
push r9
|
||||
push r10
|
||||
push r11
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
xor rax, rax
|
||||
vpcmpeqd ymm8, ymm8, ymm8
|
||||
lea r8, [rdi + rdx * 8]
|
||||
lea r9, [r8 + rdx * 8]
|
||||
.align 16
|
||||
1:
|
||||
|
||||
vmovups xmm3, XMMWORD PTR [rsi + rax * 4]
|
||||
vmovdqa ymm5, ymm8
|
||||
vmovdqa ymm6, ymm8
|
||||
vmovdqa ymm7, ymm8
|
||||
vxorpd ymm0, ymm0, ymm0
|
||||
vxorpd ymm1, ymm1, ymm1
|
||||
vxorpd ymm2, ymm2, ymm2
|
||||
vgatherdpd ymm0, [rdi + xmm3 * 8], ymm5
|
||||
vgatherdpd ymm1, [r8 + xmm3 * 8], ymm6
|
||||
vgatherdpd ymm2, [r9 + xmm3 * 8], ymm7
|
||||
|
||||
#ifdef TEST
|
||||
vmovupd [rcx + rax * 8], ymm0
|
||||
lea rbx, [rcx + rdx * 8]
|
||||
vmovupd [rbx + rax * 8], ymm1
|
||||
lea r10, [rbx + rdx * 8]
|
||||
vmovupd [r10 + rax * 8], ymm2
|
||||
#endif
|
||||
|
||||
addq rax, 4
|
||||
cmpq rax, rdx
|
||||
jl 1b
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop r11
|
||||
pop r10
|
||||
pop r9
|
||||
pop rbx
|
||||
mov rsp, rbp
|
||||
pop rbp
|
||||
ret
|
||||
.size gather_soa, .-gather_soa
|
62
util/gather-bench/src/avx512/gather.S
Normal file
62
util/gather-bench/src/avx512/gather.S
Normal file
@@ -0,0 +1,62 @@
|
||||
.intel_syntax noprefix
|
||||
.data
|
||||
.align 64
|
||||
SCALAR:
|
||||
.double 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
|
||||
|
||||
# rdi -> a
|
||||
# rsi -> idx
|
||||
# rdx -> N
|
||||
# rcx -> t
|
||||
.text
|
||||
.globl gather
|
||||
.type gather, @function
|
||||
gather :
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push rbx
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
xor rax, rax
|
||||
.align 16
|
||||
1:
|
||||
vpcmpeqb k1, xmm0, xmm0
|
||||
vpcmpeqb k2, xmm0, xmm0
|
||||
vpcmpeqb k3, xmm0, xmm0
|
||||
vpcmpeqb k4, xmm0, xmm0
|
||||
vmovdqu ymm0, [rsi + rax * 4]
|
||||
vmovdqu ymm1, [rsi + rax * 4 + 32]
|
||||
vmovdqu ymm2, [rsi + rax * 4 + 64]
|
||||
vmovdqu ymm3, [rsi + rax * 4 + 96]
|
||||
vpxord zmm4, zmm4, zmm4
|
||||
vpxord zmm5, zmm5, zmm5
|
||||
vpxord zmm6, zmm6, zmm6
|
||||
vpxord zmm7, zmm7, zmm7
|
||||
vgatherdpd zmm4{k1}, [rdi + ymm0 * 8]
|
||||
vgatherdpd zmm5{k2}, [rdi + ymm1 * 8]
|
||||
vgatherdpd zmm6{k3}, [rdi + ymm2 * 8]
|
||||
vgatherdpd zmm7{k4}, [rdi + ymm3 * 8]
|
||||
|
||||
#ifdef TEST
|
||||
vmovapd [rcx + rax * 8], zmm4
|
||||
vmovapd [rcx + rax * 8 + 64], zmm5
|
||||
vmovapd [rcx + rax * 8 + 128], zmm6
|
||||
vmovapd [rcx + rax * 8 + 192], zmm7
|
||||
#endif
|
||||
|
||||
addq rax, 32
|
||||
cmpq rax, rdx
|
||||
jl 1b
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rbx
|
||||
mov rsp, rbp
|
||||
pop rbp
|
||||
ret
|
||||
.size gather, .-gather
|
151
util/gather-bench/src/avx512/gather_aos.S
Normal file
151
util/gather-bench/src/avx512/gather_aos.S
Normal file
@@ -0,0 +1,151 @@
|
||||
.intel_syntax noprefix
|
||||
.data
|
||||
.align 64
|
||||
SCALAR:
|
||||
.double 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
|
||||
|
||||
# rdi -> a
|
||||
# rsi -> idx
|
||||
# rdx -> N
|
||||
# rcx -> t
|
||||
# r8 -> cycles
|
||||
.text
|
||||
.globl gather_aos
|
||||
.type gather_aos, @function
|
||||
gather_aos :
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push rbx
|
||||
push r9
|
||||
push r10
|
||||
push r11
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
xor rax, rax
|
||||
.align 16
|
||||
1:
|
||||
|
||||
vmovdqu ymm3, YMMWORD PTR [rsi + rax * 4]
|
||||
vpaddd ymm4, ymm3, ymm3
|
||||
#ifdef PADDING
|
||||
vpaddd ymm3, ymm4, ymm4
|
||||
#else
|
||||
vpaddd ymm3, ymm3, ymm4
|
||||
#endif
|
||||
|
||||
# Prefetching instructions
|
||||
#mov ebx, DWORD PTR[rsi + rax*4]
|
||||
#mov r9d, DWORD PTR[4 + rsi + rax*4]
|
||||
#mov r10d, DWORD PTR[8 + rsi + rax*4]
|
||||
#mov r11d, DWORD PTR[12 + rsi + rax*4]
|
||||
#mov r12d, DWORD PTR[16 + rsi + rax*4]
|
||||
#mov r13d, DWORD PTR[20 + rsi + rax*4]
|
||||
#mov r14d, DWORD PTR[24 + rsi + rax*4]
|
||||
#mov r15d, DWORD PTR[28 + rsi + rax*4]
|
||||
#lea ebx, DWORD PTR[rbx]
|
||||
#lea r9d, DWORD PTR[r9]
|
||||
#lea r10d, DWORD PTR[r10]
|
||||
#lea r11d, DWORD PTR[r11]
|
||||
#lea r12d, DWORD PTR[r12]
|
||||
#lea r13d, DWORD PTR[r13]
|
||||
#lea r14d, DWORD PTR[r14]
|
||||
#lea r15d, DWORD PTR[r15]
|
||||
|
||||
vpcmpeqb k1, xmm5, xmm5
|
||||
#ifndef ONLY_FIRST_DIMENSION
|
||||
vpcmpeqb k2, xmm5, xmm5
|
||||
vpcmpeqb k3, xmm5, xmm5
|
||||
#endif
|
||||
|
||||
vpxord zmm0, zmm0, zmm0
|
||||
#ifndef ONLY_FIRST_DIMENSION
|
||||
vpxord zmm1, zmm1, zmm1
|
||||
vpxord zmm2, zmm2, zmm2
|
||||
#endif
|
||||
|
||||
#ifdef MEASURE_GATHER_CYCLES
|
||||
|
||||
mov r9, rax
|
||||
mov r10, rdx
|
||||
xor r11, r11
|
||||
add r11, rax
|
||||
add r11, rax
|
||||
add r11, rax
|
||||
#shr r11, 3
|
||||
|
||||
xor rbx, rbx
|
||||
lfence
|
||||
rdtsc
|
||||
add ebx, eax
|
||||
vgatherdpd zmm0{k1}, [rdi + ymm3 * 8]
|
||||
lfence
|
||||
rdtsc
|
||||
sub eax, ebx
|
||||
#movdiri [r8 + r11], rax
|
||||
movnti [r8 + r11], rax
|
||||
|
||||
#ifndef ONLY_FIRST_DIMENSION
|
||||
xor rbx, rbx
|
||||
lfence
|
||||
rdtsc
|
||||
add ebx, eax
|
||||
vgatherdpd zmm1{k2}, [8 + rdi + ymm3 * 8]
|
||||
lfence
|
||||
rdtsc
|
||||
sub eax, ebx
|
||||
#movdiri [8 + r8 + r11], rax
|
||||
movnti [8 + r8 + r11], rax
|
||||
|
||||
xor rbx, rbx
|
||||
lfence
|
||||
rdtsc
|
||||
add ebx, eax
|
||||
vgatherdpd zmm2{k3}, [16 + rdi + ymm3 * 8]
|
||||
lfence
|
||||
rdtsc
|
||||
sub eax, ebx
|
||||
#movdiri [16 + r8 + r11], rax
|
||||
movnti [16 + r8 + r11], rax
|
||||
#endif // ONLY_FIRST_DIMENSION
|
||||
|
||||
mov rax, r9
|
||||
mov rdx, r10
|
||||
|
||||
#else // MEASURE_GATHER_CYCLES
|
||||
|
||||
vgatherdpd zmm0{k1}, [ rdi + ymm3 * 8]
|
||||
|
||||
#ifndef ONLY_FIRST_DIMENSION
|
||||
vgatherdpd zmm1{k2}, [8 + rdi + ymm3 * 8]
|
||||
vgatherdpd zmm2{k3}, [16 + rdi + ymm3 * 8]
|
||||
#endif
|
||||
|
||||
#endif // MEASURE_GATHER_CYCLES
|
||||
|
||||
#ifdef TEST
|
||||
vmovupd [rcx + rax * 8], zmm0
|
||||
lea rbx, [rcx + rdx * 8]
|
||||
vmovupd [rbx + rax * 8], zmm1
|
||||
lea r9, [rbx + rdx * 8]
|
||||
vmovupd [r9 + rax * 8], zmm2
|
||||
#endif
|
||||
|
||||
addq rax, 8
|
||||
cmpq rax, rdx
|
||||
jl 1b
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop r11
|
||||
pop r10
|
||||
pop r9
|
||||
pop rbx
|
||||
mov rsp, rbp
|
||||
pop rbp
|
||||
ret
|
||||
.size gather_aos, .-gather_aos
|
147
util/gather-bench/src/avx512/gather_md_aos.S
Normal file
147
util/gather-bench/src/avx512/gather_md_aos.S
Normal file
@@ -0,0 +1,147 @@
|
||||
.intel_syntax noprefix
|
||||
.data
|
||||
.align 64
|
||||
SCALAR:
|
||||
.double 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
|
||||
|
||||
.section .rodata, "a"
|
||||
.align 64
|
||||
.align 64
|
||||
.ymm_reg_mask.1:
|
||||
.long 0x00000000,0x00000001,0x00000002,0x00000003,0x00000004,0x00000005,0x00000006,0x00000007
|
||||
.type .ymm_reg_mask.1,@object
|
||||
.size .ymm_reg_mask.1,32
|
||||
.align 8
|
||||
|
||||
# rdi -> a
|
||||
# rsi -> neighbors
|
||||
# rdx -> numneighs[i]
|
||||
# rcx -> &t[t_idx]
|
||||
# r8 -> ntest
|
||||
.text
|
||||
.globl gather_md_aos
|
||||
.type gather_md_aos, @function
|
||||
gather_md_aos :
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push rbx
|
||||
push r10
|
||||
push r11
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
vmovdqu ymm7, YMMWORD PTR .ymm_reg_mask.1[rip]
|
||||
mov r15, rdx
|
||||
xor rax, rax
|
||||
.align 16
|
||||
1:
|
||||
|
||||
vmovdqu ymm3, YMMWORD PTR [rsi + rax * 4]
|
||||
vpaddd ymm4, ymm3, ymm3
|
||||
#ifdef PADDING
|
||||
vpaddd ymm3, ymm4, ymm4
|
||||
#else
|
||||
vpaddd ymm3, ymm3, ymm4
|
||||
#endif
|
||||
|
||||
# Prefetching instructions
|
||||
#mov ebx, DWORD PTR[rsi + rax*4]
|
||||
#mov r9d, DWORD PTR[4 + rsi + rax*4]
|
||||
#mov r10d, DWORD PTR[8 + rsi + rax*4]
|
||||
#mov r11d, DWORD PTR[12 + rsi + rax*4]
|
||||
#mov r12d, DWORD PTR[16 + rsi + rax*4]
|
||||
#mov r13d, DWORD PTR[20 + rsi + rax*4]
|
||||
#mov r14d, DWORD PTR[24 + rsi + rax*4]
|
||||
#mov r15d, DWORD PTR[28 + rsi + rax*4]
|
||||
#lea ebx, DWORD PTR[rbx]
|
||||
#lea r9d, DWORD PTR[r9]
|
||||
#lea r10d, DWORD PTR[r10]
|
||||
#lea r11d, DWORD PTR[r11]
|
||||
#lea r12d, DWORD PTR[r12]
|
||||
#lea r13d, DWORD PTR[r13]
|
||||
#lea r14d, DWORD PTR[r14]
|
||||
#lea r15d, DWORD PTR[r15]
|
||||
|
||||
vpcmpeqb k1, xmm5, xmm5
|
||||
#ifndef ONLY_FIRST_DIMENSION
|
||||
vpcmpeqb k2, xmm5, xmm5
|
||||
vpcmpeqb k3, xmm5, xmm5
|
||||
#endif
|
||||
|
||||
vpxord zmm0, zmm0, zmm0
|
||||
#ifndef ONLY_FIRST_DIMENSION
|
||||
vpxord zmm1, zmm1, zmm1
|
||||
vpxord zmm2, zmm2, zmm2
|
||||
#endif
|
||||
|
||||
vgatherdpd zmm0{k1}, [ rdi + ymm3 * 8]
|
||||
#ifndef ONLY_FIRST_DIMENSION
|
||||
vgatherdpd zmm1{k2}, [8 + rdi + ymm3 * 8]
|
||||
vgatherdpd zmm2{k3}, [16 + rdi + ymm3 * 8]
|
||||
#endif
|
||||
|
||||
#ifdef TEST
|
||||
vmovupd [rcx + rax * 8], zmm0
|
||||
lea rbx, [rcx + r8 * 8]
|
||||
vmovupd [rbx + rax * 8], zmm1
|
||||
lea r10, [rbx + r8 * 8]
|
||||
vmovupd [r10 + rax * 8], zmm2
|
||||
#endif
|
||||
|
||||
# TODO: see if this logic can be optimized
|
||||
addq rax, 8
|
||||
subq r15, 8
|
||||
cmpq r15, 8
|
||||
jge 1b
|
||||
|
||||
cmpq r15, 0
|
||||
jle .end_func
|
||||
|
||||
vpbroadcastd ymm6, r15d
|
||||
vpcmpgtd k1, ymm6, ymm7
|
||||
vmovdqu32 ymm3{k1}{z}, YMMWORD PTR [rsi + rax * 4]
|
||||
vpaddd ymm4, ymm3, ymm3
|
||||
#ifdef PADDING
|
||||
vpaddd ymm3, ymm4, ymm4
|
||||
#else
|
||||
vpaddd ymm3, ymm3, ymm4
|
||||
#endif
|
||||
|
||||
vpxord zmm0, zmm1, zmm2
|
||||
#ifndef ONLY_FIRST_DIMENSION
|
||||
kmovw k2, k1
|
||||
kmovw k3, k1
|
||||
vpxord zmm1, zmm1, zmm1
|
||||
vpxord zmm2, zmm2, zmm2
|
||||
#endif
|
||||
|
||||
vgatherdpd zmm0{k1}, [ rdi + ymm3 * 8]
|
||||
#ifndef ONLY_FIRST_DIMENSION
|
||||
vgatherdpd zmm1{k2}, [8 + rdi + ymm3 * 8]
|
||||
vgatherdpd zmm2{k3}, [16 + rdi + ymm3 * 8]
|
||||
#endif
|
||||
|
||||
#ifdef TEST
|
||||
vmovupd [rcx + rax * 8], zmm0
|
||||
lea rbx, [rcx + r8 * 8]
|
||||
vmovupd [rbx + rax * 8], zmm1
|
||||
lea r10, [rbx + r8 * 8]
|
||||
vmovupd [r10 + rax * 8], zmm2
|
||||
#endif
|
||||
|
||||
addq rax, r15
|
||||
|
||||
.end_func:
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop r11
|
||||
pop r10
|
||||
pop rbx
|
||||
mov rsp, rbp
|
||||
pop rbp
|
||||
ret
|
||||
.size gather_md_aos, .-gather_md_aos
|
67
util/gather-bench/src/avx512/gather_soa.S
Normal file
67
util/gather-bench/src/avx512/gather_soa.S
Normal file
@@ -0,0 +1,67 @@
|
||||
.intel_syntax noprefix
|
||||
.data
|
||||
.align 64
|
||||
SCALAR:
|
||||
.double 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
|
||||
|
||||
# rdi -> a
|
||||
# rsi -> idx
|
||||
# rdx -> N
|
||||
# rcx -> t
|
||||
.text
|
||||
.globl gather_soa
|
||||
.type gather_soa, @function
|
||||
gather_soa :
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push rbx
|
||||
push r9
|
||||
push r10
|
||||
push r11
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
xor rax, rax
|
||||
vpcmpeqd ymm8, ymm8, ymm8
|
||||
lea r8, [rdi + rdx * 8]
|
||||
lea r9, [r8 + rdx * 8]
|
||||
.align 16
|
||||
1:
|
||||
|
||||
vmovdqu ymm3, YMMWORD PTR [rsi + rax * 4]
|
||||
vpcmpeqb k1, xmm5, xmm5
|
||||
vpcmpeqb k2, xmm5, xmm5
|
||||
vpcmpeqb k3, xmm5, xmm5
|
||||
vpxord zmm0, zmm0, zmm0
|
||||
vpxord zmm1, zmm1, zmm1
|
||||
vpxord zmm2, zmm2, zmm2
|
||||
vgatherdpd zmm0{k1}, [rdi + ymm3 * 8]
|
||||
vgatherdpd zmm1{k2}, [r8 + ymm3 * 8]
|
||||
vgatherdpd zmm2{k3}, [r9 + ymm3 * 8]
|
||||
|
||||
#ifdef TEST
|
||||
vmovupd [rcx + rax * 8], zmm0
|
||||
lea rbx, [rcx + rdx * 8]
|
||||
vmovupd [rbx + rax * 8], zmm1
|
||||
lea r10, [rbx + rdx * 8]
|
||||
vmovupd [r10 + rax * 8], zmm2
|
||||
#endif
|
||||
|
||||
addq rax, 8
|
||||
cmpq rax, rdx
|
||||
jl 1b
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop r11
|
||||
pop r10
|
||||
pop r9
|
||||
pop rbx
|
||||
mov rsp, rbp
|
||||
pop rbp
|
||||
ret
|
||||
.size gather_soa, .-gather_soa
|
23
util/gather-bench/src/avx512/load_aos.S
Normal file
23
util/gather-bench/src/avx512/load_aos.S
Normal file
@@ -0,0 +1,23 @@
|
||||
.intel_syntax noprefix
|
||||
.data
|
||||
.align 64
|
||||
SCALAR:
|
||||
.double 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
|
||||
|
||||
# rdi -> &a[i * snbytes]
|
||||
|
||||
.text
|
||||
.globl load_aos
|
||||
.type load_aos, @function
|
||||
load_aos :
|
||||
|
||||
vmovsd xmm0, QWORD PTR [rdi]
|
||||
vmovsd xmm1, QWORD PTR [8 + rdi]
|
||||
vmovsd xmm2, QWORD PTR [16 + rdi]
|
||||
|
||||
vbroadcastsd zmm3, xmm0
|
||||
vbroadcastsd zmm4, xmm1
|
||||
vbroadcastsd zmm5, xmm2
|
||||
|
||||
ret
|
||||
.size load_aos, .-load_aos
|
32
util/gather-bench/src/includes/allocate.h
Normal file
32
util/gather-bench/src/includes/allocate.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* =======================================================================================
|
||||
*/
|
||||
#ifndef __ALLOCATE_H_
|
||||
#define __ALLOCATE_H_
|
||||
|
||||
extern void* allocate (int alignment, size_t bytesize);
|
||||
|
||||
#endif
|
53
util/gather-bench/src/includes/likwid-marker.h
Normal file
53
util/gather-bench/src/includes/likwid-marker.h
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* =======================================================================================
|
||||
*/
|
||||
#ifndef LIKWID_MARKERS_H
|
||||
#define LIKWID_MARKERS_H
|
||||
|
||||
#ifdef LIKWID_PERFMON
|
||||
#include <likwid.h>
|
||||
#define LIKWID_MARKER_INIT likwid_markerInit()
|
||||
#define LIKWID_MARKER_THREADINIT likwid_markerThreadInit()
|
||||
#define LIKWID_MARKER_SWITCH likwid_markerNextGroup()
|
||||
#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag)
|
||||
#define LIKWID_MARKER_START(regionTag) likwid_markerStartRegion(regionTag)
|
||||
#define LIKWID_MARKER_STOP(regionTag) likwid_markerStopRegion(regionTag)
|
||||
#define LIKWID_MARKER_CLOSE likwid_markerClose()
|
||||
#define LIKWID_MARKER_RESET(regionTag) likwid_markerResetRegion(regionTag)
|
||||
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) likwid_markerGetRegion(regionTag, nevents, events, time, count)
|
||||
#else /* LIKWID_PERFMON */
|
||||
#define LIKWID_MARKER_INIT
|
||||
#define LIKWID_MARKER_THREADINIT
|
||||
#define LIKWID_MARKER_SWITCH
|
||||
#define LIKWID_MARKER_REGISTER(regionTag)
|
||||
#define LIKWID_MARKER_START(regionTag)
|
||||
#define LIKWID_MARKER_STOP(regionTag)
|
||||
#define LIKWID_MARKER_CLOSE
|
||||
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
|
||||
#define LIKWID_MARKER_RESET(regionTag)
|
||||
#endif /* LIKWID_PERFMON */
|
||||
|
||||
#endif /*LIKWID_MARKERS_H*/
|
34
util/gather-bench/src/includes/timing.h
Normal file
34
util/gather-bench/src/includes/timing.h
Normal file
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* =======================================================================================
|
||||
*/
|
||||
#ifndef __TIMING_H_
|
||||
#define __TIMING_H_
|
||||
|
||||
extern double getTimeStamp();
|
||||
extern double getTimeResolution();
|
||||
extern double getTimeStamp_();
|
||||
|
||||
#endif
|
441
util/gather-bench/src/main-md-trace.c
Normal file
441
util/gather-bench/src/main-md-trace.c
Normal file
@@ -0,0 +1,441 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <float.h>
|
||||
#include <getopt.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <x86intrin.h>
|
||||
//---
|
||||
#include <likwid-marker.h>
|
||||
//---
|
||||
#include <allocate.h>
|
||||
#include <timing.h>
|
||||
|
||||
#if !defined(ISA_avx2) && !defined (ISA_avx512)
|
||||
#error "Invalid ISA macro, possible values are: avx2 and avx512"
|
||||
#endif
|
||||
|
||||
#if defined(TEST) && defined(ONLY_FIRST_DIMENSION)
|
||||
#error "TEST and ONLY_FIRST_DIMENSION options are mutually exclusive!"
|
||||
#endif
|
||||
|
||||
#define HLINE "----------------------------------------------------------------------------\n"
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(x,y) ((x)<(y)?(x):(y))
|
||||
#endif
|
||||
#ifndef MAX
|
||||
#define MAX(x,y) ((x)>(y)?(x):(y))
|
||||
#endif
|
||||
#ifndef ABS
|
||||
#define ABS(a) ((a) >= 0 ? (a) : -(a))
|
||||
#endif
|
||||
|
||||
#define ARRAY_ALIGNMENT 64
|
||||
|
||||
#ifdef ISA_avx512
|
||||
#define _VL_ 8
|
||||
#define ISA_STRING "avx512"
|
||||
#else
|
||||
#define _VL_ 4
|
||||
#define ISA_STRING "avx2"
|
||||
#endif
|
||||
|
||||
#ifdef AOS
|
||||
#define GATHER gather_md_aos
|
||||
#define LOAD(a, i, d, n) load_aos(&a[i * d])
|
||||
#define LAYOUT_STRING "AoS"
|
||||
#else
|
||||
#define GATHER gather_md_soa
|
||||
#define LOAD(a, i, d, n) load_soa(a, i, n)
|
||||
#define LAYOUT_STRING "SoA"
|
||||
#endif
|
||||
|
||||
#if defined(PADDING) && defined(AOS)
|
||||
#define PADDING_BYTES 1
|
||||
#else
|
||||
#define PADDING_BYTES 0
|
||||
#endif
|
||||
|
||||
#ifdef MEM_TRACER
|
||||
# define MEM_TRACER_INIT(trace_file) FILE *mem_tracer_fp = fopen(get_mem_tracer_filename(trace_file), "w");
|
||||
# define MEM_TRACER_END fclose(mem_tracer_fp);
|
||||
# define MEM_TRACE(addr, op) fprintf(mem_tracer_fp, "%c: %p\n", op, (void *)(&(addr)));
|
||||
#else
|
||||
# define MEM_TRACER_INIT
|
||||
# define MEM_TRACER_END
|
||||
# define MEM_TRACE(addr, op)
|
||||
#endif
|
||||
|
||||
int gather_md_aos(double*, int*, int, double*, int);
|
||||
int gather_md_soa(double*, int*, int, double*, int);
|
||||
void load_aos(double*);
|
||||
void load_soa(double*, int, int);
|
||||
|
||||
const char *get_mem_tracer_filename(const char *trace_file) {
|
||||
static char fname[64];
|
||||
snprintf(fname, sizeof fname, "mem_tracer_%s.txt", trace_file);
|
||||
return fname;
|
||||
}
|
||||
|
||||
int log2_uint(unsigned int x) {
|
||||
int ans = 0;
|
||||
while(x >>= 1) { ans++; }
|
||||
return ans;
|
||||
}
|
||||
|
||||
int main (int argc, char** argv) {
|
||||
LIKWID_MARKER_INIT;
|
||||
LIKWID_MARKER_REGISTER("gather");
|
||||
char *trace_file = NULL;
|
||||
int cl_size = 64;
|
||||
int ntimesteps = 200;
|
||||
int reneigh_every = 20;
|
||||
int opt = 0;
|
||||
double freq = 2.5;
|
||||
struct option long_opts[] = {
|
||||
{"trace" , required_argument, NULL, 't'},
|
||||
{"freq", required_argument, NULL, 'f'},
|
||||
{"line", required_argument, NULL, 'l'},
|
||||
{"timesteps", required_argument, NULL, 'n'},
|
||||
{"reneigh", required_argument, NULL, 'r'},
|
||||
{"help", required_argument, NULL, 'h'}
|
||||
};
|
||||
|
||||
while((opt = getopt_long(argc, argv, "t:f:l:n:r:h", long_opts, NULL)) != -1) {
|
||||
switch(opt) {
|
||||
case 't':
|
||||
trace_file = strdup(optarg);
|
||||
break;
|
||||
|
||||
case 'f':
|
||||
freq = atof(optarg);
|
||||
break;
|
||||
|
||||
case 'l':
|
||||
cl_size = atoi(optarg);
|
||||
break;
|
||||
|
||||
case 'n':
|
||||
ntimesteps = atoi(optarg);
|
||||
break;
|
||||
|
||||
case 'r':
|
||||
reneigh_every = atoi(optarg);
|
||||
break;
|
||||
|
||||
case 'h':
|
||||
case '?':
|
||||
default:
|
||||
printf("Usage: %s [OPTION]...\n", argv[0]);
|
||||
printf("MD variant for gather benchmark.\n\n");
|
||||
printf("Mandatory arguments to long options are also mandatory for short options.\n");
|
||||
printf("\t-t, --trace=STRING input file with traced indexes from MD-Bench.\n");
|
||||
printf("\t-f, --freq=REAL CPU frequency in GHz (default 2.5).\n");
|
||||
printf("\t-l, --line=NUMBER cache line size in bytes (default 64).\n");
|
||||
printf("\t-n, --timesteps=NUMBER number of timesteps to simulate (default 200).\n");
|
||||
printf("\t-r, --reneigh=NUMBER reneighboring frequency in timesteps (default 20).\n");
|
||||
printf("\t-h, --help display this help message.\n");
|
||||
printf("\n\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
if(trace_file == NULL) {
|
||||
fprintf(stderr, "Trace file not specified!\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
FILE *fp;
|
||||
char *line = NULL;
|
||||
int *neighborlists = NULL;
|
||||
int *numneighs = NULL;
|
||||
int atom = -1;
|
||||
int nlocal, nghost, maxneighs;
|
||||
int nall = 0;
|
||||
int N_alloc = 0;
|
||||
size_t ntest = 0;
|
||||
size_t llen;
|
||||
ssize_t read;
|
||||
double *a = NULL;
|
||||
double *f = NULL;
|
||||
double *t = NULL;
|
||||
double time = 0.0;
|
||||
double E, S;
|
||||
const int dims = 3;
|
||||
const int snbytes = dims + PADDING_BYTES; // bytes per element (struct), includes padding
|
||||
long long int niters = 0;
|
||||
long long int ngathered = 0;
|
||||
|
||||
printf("ISA,Layout,Dims,Frequency (GHz),Cache Line Size (B),Vector Width (e)\n");
|
||||
printf("%s,%s,%d,%f,%d,%d\n\n", ISA_STRING, LAYOUT_STRING, dims, freq, cl_size, _VL_);
|
||||
freq = freq * 1e9;
|
||||
|
||||
#ifdef ONLY_FIRST_DIMENSION
|
||||
const int gathered_dims = 1;
|
||||
#else
|
||||
const int gathered_dims = dims;
|
||||
#endif
|
||||
|
||||
for(int ts = -1; ts < ntimesteps; ts++) {
|
||||
if(!((ts + 1) % reneigh_every)) {
|
||||
char ts_trace_file[128];
|
||||
snprintf(ts_trace_file, sizeof ts_trace_file, "%s_%d.out", trace_file, ts + 1);
|
||||
if((fp = fopen(ts_trace_file, "r")) == NULL) {
|
||||
fprintf(stderr, "Error: could not open trace file!\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
while((read = getline(&line, &llen, fp)) != -1) {
|
||||
int i = 2;
|
||||
if(strncmp(line, "N:", 2) == 0) {
|
||||
while(line[i] == ' ') { i++; }
|
||||
nlocal = atoi(strtok(&line[i], " "));
|
||||
nghost = atoi(strtok(NULL, " "));
|
||||
nall = nlocal + nghost;
|
||||
maxneighs = atoi(strtok(NULL, " "));
|
||||
|
||||
if(nlocal <= 0 || maxneighs <= 0) {
|
||||
fprintf(stderr, "Number of local atoms and neighbor lists capacity cannot be less or equal than zero!\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if(neighborlists == NULL) {
|
||||
neighborlists = (int *) allocate( ARRAY_ALIGNMENT, nlocal * maxneighs * sizeof(int) );
|
||||
numneighs = (int *) allocate( ARRAY_ALIGNMENT, nlocal * sizeof(int) );
|
||||
}
|
||||
}
|
||||
|
||||
if(strncmp(line, "A:", 2) == 0) {
|
||||
while(line[i] == ' ') { i++; }
|
||||
atom = atoi(strtok(&line[i], " "));
|
||||
numneighs[atom] = 0;
|
||||
}
|
||||
|
||||
if(strncmp(line, "I:", 2) == 0) {
|
||||
while(line[i] == ' ') { i++; }
|
||||
char *neigh_idx = strtok(&line[i], " ");
|
||||
|
||||
while(neigh_idx != NULL && *neigh_idx != '\n') {
|
||||
int j = numneighs[atom];
|
||||
neighborlists[atom * maxneighs + j] = atoi(neigh_idx);
|
||||
numneighs[atom]++;
|
||||
ntest++;
|
||||
neigh_idx = strtok(NULL, " ");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
if(N_alloc == 0) {
|
||||
N_alloc = nall * 2;
|
||||
a = (double*) allocate( ARRAY_ALIGNMENT, N_alloc * snbytes * sizeof(double) );
|
||||
f = (double*) allocate( ARRAY_ALIGNMENT, N_alloc * dims * sizeof(double) );
|
||||
}
|
||||
|
||||
#ifdef TEST
|
||||
if(t != NULL) { free(t); }
|
||||
ntest += 100;
|
||||
t = (double*) allocate( ARRAY_ALIGNMENT, ntest * dims * sizeof(double) );
|
||||
#endif
|
||||
|
||||
for(int i = 0; i < N_alloc; ++i) {
|
||||
#ifdef AOS
|
||||
a[i * snbytes + 0] = i * dims + 0;
|
||||
a[i * snbytes + 1] = i * dims + 1;
|
||||
a[i * snbytes + 2] = i * dims + 2;
|
||||
#else
|
||||
a[N * 0 + i] = N * 0 + i;
|
||||
a[N * 1 + i] = N * 1 + i;
|
||||
a[N * 2 + i] = N * 2 + i;
|
||||
#endif
|
||||
f[i * dims + 0] = 0.0;
|
||||
f[i * dims + 1] = 0.0;
|
||||
f[i * dims + 2] = 0.0;
|
||||
}
|
||||
|
||||
int t_idx = 0;
|
||||
S = getTimeStamp();
|
||||
LIKWID_MARKER_START("gather");
|
||||
for(int i = 0; i < nlocal; i++) {
|
||||
int *neighbors = &neighborlists[i * maxneighs];
|
||||
// We inline the assembly for AVX512 with AoS layout to evaluate the impact
|
||||
// of calling external assembly procedures in the overall runtime
|
||||
#ifdef ISA_avx512
|
||||
__m256i ymm_reg_mask = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
__asm__ __volatile__( "vmovsd 0(%0), %%xmm3;"
|
||||
"vmovsd 8(%0), %%xmm4;"
|
||||
"vmovsd 16(%0), %%xmm5;"
|
||||
"vbroadcastsd %%xmm3, %%zmm0;"
|
||||
"vbroadcastsd %%xmm4, %%zmm1;"
|
||||
"vbroadcastsd %%xmm5, %%zmm2;"
|
||||
:
|
||||
: "r" (&a[i * snbytes])
|
||||
: "%xmm3", "%xmm4", "%xmm5", "%zmm0", "%zmm1", "%zmm2" );
|
||||
|
||||
__asm__ __volatile__( "xor %%rax, %%rax;"
|
||||
"movq %%rdx, %%r15;"
|
||||
"1: vmovdqu (%1,%%rax,4), %%ymm3;"
|
||||
"vpaddd %%ymm3, %%ymm3, %%ymm4;"
|
||||
#ifdef PADDING
|
||||
"vpaddd %%ymm4, %%ymm4, %%ymm3;"
|
||||
#else
|
||||
"vpaddd %%ymm3, %%ymm4, %%ymm3;"
|
||||
#endif
|
||||
"vpcmpeqb %%xmm5, %%xmm5, %%k1;"
|
||||
"vpcmpeqb %%xmm5, %%xmm5, %%k2;"
|
||||
"vpcmpeqb %%xmm5, %%xmm5, %%k3;"
|
||||
"vpxord %%zmm0, %%zmm0, %%zmm0;"
|
||||
"vpxord %%zmm1, %%zmm1, %%zmm1;"
|
||||
"vpxord %%zmm2, %%zmm2, %%zmm2;"
|
||||
"vgatherdpd (%3, %%ymm3, 8), %%zmm0{{%%k1}};"
|
||||
"vgatherdpd 8(%3, %%ymm3, 8), %%zmm1{{%%k2}};"
|
||||
"vgatherdpd 16(%3, %%ymm3, 8), %%zmm2{{%%k3}};"
|
||||
"addq $8, %%rax;"
|
||||
"subq $8, %%r15;"
|
||||
"cmpq $8, %%r15;"
|
||||
"jge 1b;"
|
||||
"cmpq $0, %%r15;"
|
||||
"jle 2;"
|
||||
"vpbroadcastd %%r15d, %%ymm5;"
|
||||
"vpcmpgtd %%ymm5, %2, %%k1;"
|
||||
"vmovdqu32 (%1,%%rax,4), %%ymm3{{%%k1}}{{z}};"
|
||||
"vpaddd %%ymm3, %%ymm3, %%ymm4;"
|
||||
#ifdef PADDING
|
||||
"vpaddd %%ymm4, %%ymm4, %%ymm3;"
|
||||
#else
|
||||
"vpaddd %%ymm3, %%ymm4, %%ymm3;"
|
||||
#endif
|
||||
"vpxord %%zmm0, %%zmm0, %%zmm0;"
|
||||
"kmovw %%k1, %%k2;"
|
||||
"kmovw %%k1, %%k3;"
|
||||
"vpxord %%zmm1, %%zmm1, %%zmm1;"
|
||||
"vpxord %%zmm2, %%zmm2, %%zmm2;"
|
||||
"vgatherdpd (%3, %%ymm3, 8), %%zmm0{{%%k1}};"
|
||||
"vgatherdpd 8(%3, %%ymm3, 8), %%zmm1{{%%k2}};"
|
||||
"vgatherdpd 16(%3, %%ymm3, 8), %%zmm2{{%%k3}};"
|
||||
"addq %%r15, %%rax;"
|
||||
"2:;"
|
||||
:
|
||||
: "d" (numneighs[i]), "r" (neighbors), "x" (ymm_reg_mask), "r" (a)
|
||||
: "%rax", "%r15", "%ymm3", "%ymm4", "%ymm5", "%k1", "%k2", "%k3", "%zmm0", "%zmm1", "%zmm2" );
|
||||
#else
|
||||
LOAD(a, i, snbytes, N_alloc);
|
||||
t_idx += GATHER(a, neighbors, numneighs[i], &t[t_idx], ntest);
|
||||
#endif
|
||||
f[i * dims + 0] += i;
|
||||
f[i * dims + 1] += i;
|
||||
f[i * dims + 2] += i;
|
||||
}
|
||||
LIKWID_MARKER_STOP("gather");
|
||||
E = getTimeStamp();
|
||||
time += E - S;
|
||||
|
||||
#ifdef MEM_TRACER
|
||||
MEM_TRACER_INIT(trace_file);
|
||||
for(int i = 0; i < nlocal; i++) {
|
||||
int *neighbors = &neighborlists[i * maxneighs];
|
||||
|
||||
for(int d = 0; d < gathered_dims; d++) {
|
||||
#ifdef AOS
|
||||
MEM_TRACE('R', a[i * snbytes + d])
|
||||
#else
|
||||
MEM_TRACE('R', a[d * N + i])
|
||||
#endif
|
||||
}
|
||||
|
||||
for(int j = 0; j < numneighs[i]; j += _VL_) {
|
||||
for(int jj = j; jj < MIN(j + _VL_, numneighs[i]); j++) {
|
||||
int k = neighbors[jj];
|
||||
for(int d = 0; d < gathered_dims; d++) {
|
||||
#ifdef AOS
|
||||
MEM_TRACE('R', a[k * snbytes + d])
|
||||
#else
|
||||
MEM_TRACE('R', a[d * N + k])
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
MEM_TRACER_END;
|
||||
#endif
|
||||
|
||||
#ifdef TEST
|
||||
int test_failed = 0;
|
||||
t_idx = 0;
|
||||
for(int i = 0; i < nlocal; ++i) {
|
||||
int *neighbors = &neighborlists[i * maxneighs];
|
||||
for(int j = 0; j < numneighs[i]; ++j) {
|
||||
int k = neighbors[j];
|
||||
for(int d = 0; d < dims; ++d) {
|
||||
#ifdef AOS
|
||||
if(t[d * ntest + t_idx] != k * dims + d) {
|
||||
#else
|
||||
if(t[d * ntest + t_idx] != d * N + k) {
|
||||
#endif
|
||||
test_failed = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
t_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
if(test_failed) {
|
||||
printf("Test failed!\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
#endif
|
||||
|
||||
for(int i = 0; i < nlocal; i++) {
|
||||
niters += (numneighs[i] / _VL_) + ((numneighs[i] % _VL_ == 0) ? 0 : 1);
|
||||
ngathered += numneighs[i];
|
||||
}
|
||||
}
|
||||
|
||||
printf("%14s,%14s,%14s,%14s,%14s,%14s", "tot. time(s)", "time/step(ms)", "time/iter(us)", "cy/it", "cy/gather", "cy/elem");
|
||||
printf("\n");
|
||||
const double time_per_step = time * 1e3 / ((double) ntimesteps);
|
||||
const double time_per_it = time * 1e6 / ((double) niters);
|
||||
const double cy_per_it = time * freq * _VL_ / ((double) niters);
|
||||
const double cy_per_gather = time * freq * _VL_ / ((double) niters * gathered_dims);
|
||||
const double cy_per_elem = time * freq / ((double) ngathered * gathered_dims);
|
||||
printf("%14.6f,%14.6f,%14.6f,%14.6f,%14.6f,%14.6f\n", time, time_per_step, time_per_it, cy_per_it, cy_per_gather, cy_per_elem);
|
||||
|
||||
#ifdef TEST
|
||||
printf("Test passed!\n");
|
||||
#endif
|
||||
|
||||
LIKWID_MARKER_CLOSE;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
361
util/gather-bench/src/main-md.c
Normal file
361
util/gather-bench/src/main-md.c
Normal file
@@ -0,0 +1,361 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <float.h>
|
||||
#include <getopt.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
//---
|
||||
#include <likwid-marker.h>
|
||||
//---
|
||||
#include <allocate.h>
|
||||
#include <timing.h>
|
||||
|
||||
#if !defined(ISA_avx2) && !defined (ISA_avx512)
|
||||
#error "Invalid ISA macro, possible values are: avx2 and avx512"
|
||||
#endif
|
||||
|
||||
#if defined(TEST) && defined(ONLY_FIRST_DIMENSION)
|
||||
#error "TEST and ONLY_FIRST_DIMENSION options are mutually exclusive!"
|
||||
#endif
|
||||
|
||||
#define HLINE "----------------------------------------------------------------------------\n"
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(x,y) ((x)<(y)?(x):(y))
|
||||
#endif
|
||||
#ifndef MAX
|
||||
#define MAX(x,y) ((x)>(y)?(x):(y))
|
||||
#endif
|
||||
#ifndef ABS
|
||||
#define ABS(a) ((a) >= 0 ? (a) : -(a))
|
||||
#endif
|
||||
|
||||
#define ARRAY_ALIGNMENT 64
|
||||
#define SIZE 20000
|
||||
|
||||
#ifdef ISA_avx512
|
||||
#define _VL_ 8
|
||||
#define ISA_STRING "avx512"
|
||||
#else
|
||||
#define _VL_ 4
|
||||
#define ISA_STRING "avx2"
|
||||
#endif
|
||||
|
||||
#ifdef AOS
|
||||
#define GATHER gather_aos
|
||||
#define LAYOUT_STRING "AoS"
|
||||
#else
|
||||
#define GATHER gather_soa
|
||||
#define LAYOUT_STRING "SoA"
|
||||
#endif
|
||||
|
||||
#if defined(PADDING) && defined(AOS)
|
||||
#define PADDING_BYTES 1
|
||||
#else
|
||||
#define PADDING_BYTES 0
|
||||
#endif
|
||||
|
||||
#ifdef MEM_TRACER
|
||||
# define MEM_TRACER_INIT(stride, size) FILE *mem_tracer_fp = fopen(get_mem_tracer_filename(stride, size), "w");
|
||||
# define MEM_TRACER_END fclose(mem_tracer_fp);
|
||||
# define MEM_TRACE(addr, op) fprintf(mem_tracer_fp, "%c: %p\n", op, (void *)(&(addr)));
|
||||
#else
|
||||
# define MEM_TRACER_INIT
|
||||
# define MEM_TRACER_END
|
||||
# define MEM_TRACE(addr, op)
|
||||
#endif
|
||||
|
||||
extern void gather_aos(double*, int*, int, double*, long int*);
|
||||
extern void gather_soa(double*, int*, int, double*, long int*);
|
||||
|
||||
const char *get_mem_tracer_filename(int stride, int size) {
|
||||
static char fname[64];
|
||||
snprintf(fname, sizeof fname, "mem_tracer_%d_%d.txt", stride, size);
|
||||
return fname;
|
||||
}
|
||||
|
||||
int log2_uint(unsigned int x) {
|
||||
int ans = 0;
|
||||
while(x >>= 1) { ans++; }
|
||||
return ans;
|
||||
}
|
||||
|
||||
int main (int argc, char** argv) {
|
||||
LIKWID_MARKER_INIT;
|
||||
LIKWID_MARKER_REGISTER("gather");
|
||||
int stride = 1;
|
||||
int cl_size = 64;
|
||||
int opt = 0;
|
||||
double freq = 2.5;
|
||||
struct option long_opts[] = {
|
||||
{"stride", required_argument, NULL, 's'},
|
||||
{"freq", required_argument, NULL, 'f'},
|
||||
{"line", required_argument, NULL, 'l'},
|
||||
{"help", required_argument, NULL, 'h'}
|
||||
};
|
||||
|
||||
while((opt = getopt_long(argc, argv, "s:f:l:h", long_opts, NULL)) != -1) {
|
||||
switch(opt) {
|
||||
case 's':
|
||||
stride = atoi(optarg);
|
||||
break;
|
||||
|
||||
case 'f':
|
||||
freq = atof(optarg);
|
||||
break;
|
||||
|
||||
case 'l':
|
||||
cl_size = atoi(optarg);
|
||||
break;
|
||||
|
||||
case 'h':
|
||||
case '?':
|
||||
default:
|
||||
printf("Usage: %s [OPTION]...\n", argv[0]);
|
||||
printf("MD variant for gather benchmark.\n\n");
|
||||
printf("Mandatory arguments to long options are also mandatory for short options.\n");
|
||||
printf("\t-s, --stride=NUMBER stride between two successive elements (default 1).\n");
|
||||
printf("\t-f, --freq=REAL CPU frequency in GHz (default 2.5).\n");
|
||||
printf("\t-l, --line=NUMBER cache line size in bytes (default 64).\n");
|
||||
printf("\t-h, --help display this help message.\n");
|
||||
printf("\n\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
size_t bytesPerWord = sizeof(double);
|
||||
const int dims = 3;
|
||||
const int snbytes = dims + PADDING_BYTES; // bytes per element (struct), includes padding
|
||||
#ifdef AOS
|
||||
size_t cacheLinesPerGather = MIN(MAX(stride * _VL_ * snbytes / (cl_size / sizeof(double)), 1), _VL_);
|
||||
#else
|
||||
size_t cacheLinesPerGather = MIN(MAX(stride * _VL_ / (cl_size / sizeof(double)), 1), _VL_) * dims;
|
||||
#endif
|
||||
size_t N = SIZE;
|
||||
double E, S;
|
||||
|
||||
printf("ISA,Layout,Stride,Dims,Frequency (GHz),Cache Line Size (B),Vector Width (e),Cache Lines/Gather\n");
|
||||
printf("%s,%s,%d,%d,%f,%d,%d,%lu\n\n", ISA_STRING, LAYOUT_STRING, stride, dims, freq, cl_size, _VL_, cacheLinesPerGather);
|
||||
printf("%14s,%14s,%14s,", "N", "Size(kB)", "cut CLs");
|
||||
|
||||
#ifndef MEASURE_GATHER_CYCLES
|
||||
printf("%14s,%14s,%14s,%14s,%14s", "tot. time", "time/LUP(ms)", "cy/it", "cy/gather", "cy/elem");
|
||||
#else
|
||||
|
||||
#ifdef ONLY_FIRST_DIMENSION
|
||||
printf("%27s,%27s,%27s", "min/max/avg cy(x)", "min/max/avg cy(y)", "min/max/avg cy(z)");
|
||||
#else
|
||||
printf("%27s", "min/max/avg cy(x)");
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
printf("\n");
|
||||
freq = freq * 1e9;
|
||||
|
||||
for(int N = 512; N < 80000000; N = 1.5 * N) {
|
||||
// Currently this only works when the array size (in elements) is multiple of the vector length (no preamble and prelude)
|
||||
if(N % _VL_ != 0) {
|
||||
N += _VL_ - (N % _VL_);
|
||||
}
|
||||
|
||||
MEM_TRACER_INIT(stride, N);
|
||||
|
||||
int N_gathers_per_dim = N / _VL_;
|
||||
int N_alloc = N * 2;
|
||||
int N_cycles_alloc = N_gathers_per_dim * 2;
|
||||
int cut_cl = 0;
|
||||
double* a = (double*) allocate( ARRAY_ALIGNMENT, N_alloc * snbytes * sizeof(double) );
|
||||
int* idx = (int*) allocate( ARRAY_ALIGNMENT, N_alloc * sizeof(int) );
|
||||
int rep;
|
||||
double time;
|
||||
|
||||
#ifdef TEST
|
||||
double* t = (double*) allocate( ARRAY_ALIGNMENT, N_alloc * dims * sizeof(double) );
|
||||
#else
|
||||
double* t = (double*) NULL;
|
||||
#endif
|
||||
|
||||
#ifdef MEASURE_GATHER_CYCLES
|
||||
long int* cycles = (long int*) allocate( ARRAY_ALIGNMENT, N_cycles_alloc * dims * sizeof(long int)) ;
|
||||
#else
|
||||
long int* cycles = (long int*) NULL;
|
||||
#endif
|
||||
|
||||
for(int i = 0; i < N_alloc; ++i) {
|
||||
#ifdef AOS
|
||||
a[i * snbytes + 0] = i * dims + 0;
|
||||
a[i * snbytes + 1] = i * dims + 1;
|
||||
a[i * snbytes + 2] = i * dims + 2;
|
||||
#else
|
||||
a[N * 0 + i] = N * 0 + i;
|
||||
a[N * 1 + i] = N * 1 + i;
|
||||
a[N * 2 + i] = N * 2 + i;
|
||||
#endif
|
||||
idx[i] = (i * stride) % N;
|
||||
}
|
||||
|
||||
#ifdef ONLY_FIRST_DIMENSION
|
||||
const int gathered_dims = 1;
|
||||
#else
|
||||
const int gathered_dims = dims;
|
||||
#endif
|
||||
|
||||
#ifdef MEM_TRACER
|
||||
for(int i = 0; i < N; i += _VL_) {
|
||||
for(int j = 0; j < _VL_; j++) {
|
||||
MEM_TRACE(idx[i + j], 'R');
|
||||
}
|
||||
|
||||
for(int d = 0; d < gathered_dims; d++) {
|
||||
for(int j = 0; j < _VL_; j++) {
|
||||
#ifdef AOS
|
||||
MEM_TRACE(a[idx[i + j] * snbytes + d], 'R');
|
||||
#else
|
||||
MEM_TRACE(a[N * d + idx[i + j]], 'R');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef AOS
|
||||
const int cl_shift = log2_uint((unsigned int) cl_size);
|
||||
for(int i = 0; i < N; i++) {
|
||||
const int first_cl = (idx[i] * snbytes * sizeof(double)) >> cl_shift;
|
||||
const int last_cl = ((idx[i] * snbytes + gathered_dims - 1) * sizeof(double)) >> cl_shift;
|
||||
if(first_cl != last_cl) {
|
||||
cut_cl++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
S = getTimeStamp();
|
||||
for(int r = 0; r < 100; ++r) {
|
||||
GATHER(a, idx, N, t, cycles);
|
||||
}
|
||||
E = getTimeStamp();
|
||||
|
||||
#ifdef MEASURE_GATHER_CYCLES
|
||||
for(int i = 0; i < N_cycles_alloc; i++) {
|
||||
cycles[i * 3 + 0] = 0;
|
||||
cycles[i * 3 + 1] = 0;
|
||||
cycles[i * 3 + 2] = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
rep = 100 * (0.5 / (E - S));
|
||||
S = getTimeStamp();
|
||||
LIKWID_MARKER_START("gather");
|
||||
for(int r = 0; r < rep; ++r) {
|
||||
GATHER(a, idx, N, t, cycles);
|
||||
}
|
||||
LIKWID_MARKER_STOP("gather");
|
||||
E = getTimeStamp();
|
||||
|
||||
time = E - S;
|
||||
|
||||
#ifdef TEST
|
||||
int test_failed = 0;
|
||||
for(int i = 0; i < N; ++i) {
|
||||
for(int d = 0; d < dims; ++d) {
|
||||
#ifdef AOS
|
||||
if(t[d * N + i] != ((i * stride) % N) * dims + d) {
|
||||
#else
|
||||
if(t[d * N + i] != d * N + ((i * stride) % N)) {
|
||||
#endif
|
||||
test_failed = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(test_failed) {
|
||||
printf("Test failed!\n");
|
||||
return EXIT_FAILURE;
|
||||
} else {
|
||||
printf("Test passed!\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
const double size = N * (dims * sizeof(double) + sizeof(int)) / 1000.0;
|
||||
printf("%14d,%14.2f,%14d,", N, size, cut_cl);
|
||||
|
||||
#ifndef MEASURE_GATHER_CYCLES
|
||||
const double time_per_it = time * 1e6 / ((double) N * rep);
|
||||
const double cy_per_it = time * freq * _VL_ / ((double) N * rep);
|
||||
const double cy_per_gather = time * freq * _VL_ / ((double) N * rep * gathered_dims);
|
||||
const double cy_per_elem = time * freq / ((double) N * rep * gathered_dims);
|
||||
printf("%14.10f,%14.10f,%14.6f,%14.6f,%14.6f", time, time_per_it, cy_per_it, cy_per_gather, cy_per_elem);
|
||||
#else
|
||||
double cy_min[dims];
|
||||
double cy_max[dims];
|
||||
double cy_avg[dims];
|
||||
|
||||
for(int d = 0; d < dims; d++) {
|
||||
cy_min[d] = 100000.0;
|
||||
cy_max[d] = 0.0;
|
||||
cy_avg[d] = 0.0;
|
||||
}
|
||||
|
||||
for(int i = 0; i < N_gathers_per_dim; ++i) {
|
||||
for(int d = 0; d < gathered_dims; d++) {
|
||||
const double cy_d = (double)(cycles[i * 3 + d]);
|
||||
cy_min[d] = MIN(cy_min[d], cy_d);
|
||||
cy_max[d] = MAX(cy_max[d], cy_d);
|
||||
cy_avg[d] += cy_d;
|
||||
}
|
||||
}
|
||||
|
||||
for(int d = 0; d < gathered_dims; d++) {
|
||||
char tmp_str[64];
|
||||
cy_avg[d] /= (double) N_gathers_per_dim;
|
||||
snprintf(tmp_str, sizeof tmp_str, "%4.4f/%4.4f/%4.4f", cy_min[d], cy_max[d], cy_avg[d]);
|
||||
printf("%27s%c", tmp_str, (d < gathered_dims - 1) ? ',' : ' ');
|
||||
}
|
||||
#endif
|
||||
|
||||
printf("\n");
|
||||
free(a);
|
||||
free(idx);
|
||||
|
||||
#ifdef TEST
|
||||
free(t);
|
||||
#endif
|
||||
|
||||
#ifdef MEASURE_GATHER_CYCLES
|
||||
free(cycles);
|
||||
#endif
|
||||
|
||||
MEM_TRACER_END;
|
||||
}
|
||||
|
||||
LIKWID_MARKER_CLOSE;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
166
util/gather-bench/src/main.c
Normal file
166
util/gather-bench/src/main.c
Normal file
@@ -0,0 +1,166 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#include <float.h>
|
||||
//---
|
||||
#include <likwid-marker.h>
|
||||
//---
|
||||
#include <timing.h>
|
||||
#include <allocate.h>
|
||||
|
||||
#if !defined(ISA_avx2) && !defined (ISA_avx512)
|
||||
#error "Invalid ISA macro, possible values are: avx2 and avx512"
|
||||
#endif
|
||||
|
||||
#define HLINE "----------------------------------------------------------------------------\n"
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(x,y) ((x)<(y)?(x):(y))
|
||||
#endif
|
||||
#ifndef MAX
|
||||
#define MAX(x,y) ((x)>(y)?(x):(y))
|
||||
#endif
|
||||
#ifndef ABS
|
||||
#define ABS(a) ((a) >= 0 ? (a) : -(a))
|
||||
#endif
|
||||
|
||||
#define ARRAY_ALIGNMENT 64
|
||||
#define SIZE 20000
|
||||
|
||||
#ifdef ISA_avx512
|
||||
#define _VL_ 8
|
||||
#define ISA_STRING "avx512"
|
||||
#else
|
||||
#define _VL_ 4
|
||||
#define ISA_STRING "avx2"
|
||||
#endif
|
||||
|
||||
#ifdef TEST
|
||||
extern void gather(double*, int*, int, double*);
|
||||
#else
|
||||
extern void gather(double*, int*, int);
|
||||
#endif
|
||||
|
||||
int main (int argc, char** argv) {
|
||||
LIKWID_MARKER_INIT;
|
||||
LIKWID_MARKER_REGISTER("gather");
|
||||
|
||||
if (argc < 3) {
|
||||
printf("Please provide stride and frequency\n");
|
||||
printf("%s <stride> <freq (GHz)> [cache line size (B)]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int stride = atoi(argv[1]);
|
||||
double freq = atof(argv[2]);
|
||||
int cl_size = (argc == 3) ? 64 : atoi(argv[3]);
|
||||
size_t bytesPerWord = sizeof(double);
|
||||
size_t cacheLinesPerGather = MIN(MAX(stride * _VL_ / (cl_size / sizeof(double)), 1), _VL_);
|
||||
size_t N = SIZE;
|
||||
double E, S;
|
||||
|
||||
printf("ISA,Stride (elems),Frequency (GHz),Cache Line Size (B),Vector Width (elems),Cache Lines/Gather\n");
|
||||
printf("%s,%d,%f,%d,%d,%lu\n\n", ISA_STRING, stride, freq, cl_size, _VL_, cacheLinesPerGather);
|
||||
printf("%14s,%14s,%14s,%14s,%14s,%14s\n", "N", "Size(kB)", "tot. time", "time/LUP(ms)", "cy/gather", "cy/elem");
|
||||
|
||||
freq = freq * 1e9;
|
||||
for(int N = 1024; N < 400000; N = 1.5 * N) {
|
||||
int N_alloc = N * 2;
|
||||
double* a = (double*) allocate( ARRAY_ALIGNMENT, N_alloc * sizeof(double) );
|
||||
int* idx = (int*) allocate( ARRAY_ALIGNMENT, N_alloc * sizeof(int) );
|
||||
int rep;
|
||||
double time;
|
||||
|
||||
#ifdef TEST
|
||||
double* t = (double*) allocate( ARRAY_ALIGNMENT, N_alloc * sizeof(double) );
|
||||
#endif
|
||||
|
||||
for(int i = 0; i < N_alloc; ++i) {
|
||||
a[i] = i;
|
||||
idx[i] = (i * stride) % N;
|
||||
}
|
||||
|
||||
S = getTimeStamp();
|
||||
for(int r = 0; r < 100; ++r) {
|
||||
#ifdef TEST
|
||||
gather(a, idx, N, t);
|
||||
#else
|
||||
gather(a, idx, N);
|
||||
#endif
|
||||
}
|
||||
E = getTimeStamp();
|
||||
|
||||
rep = 100 * (0.5 / (E - S));
|
||||
S = getTimeStamp();
|
||||
LIKWID_MARKER_START("gather");
|
||||
for(int r = 0; r < rep; ++r) {
|
||||
#ifdef TEST
|
||||
gather(a, idx, N, t);
|
||||
#else
|
||||
gather(a, idx, N);
|
||||
#endif
|
||||
}
|
||||
LIKWID_MARKER_STOP("gather");
|
||||
E = getTimeStamp();
|
||||
|
||||
time = E - S;
|
||||
|
||||
#ifdef TEST
|
||||
int test_failed = 0;
|
||||
for(int i = 0; i < N; ++i) {
|
||||
if(t[i] != i * stride % N) {
|
||||
test_failed = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(test_failed) {
|
||||
printf("Test failed!\n");
|
||||
return EXIT_FAILURE;
|
||||
} else {
|
||||
printf("Test passed!\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
const double size = N * (sizeof(double) + sizeof(int)) / 1000.0;
|
||||
const double time_per_it = time * 1e6 / ((double) N * rep);
|
||||
const double cy_per_gather = time * freq * _VL_ / ((double) N * rep);
|
||||
const double cy_per_elem = time * freq / ((double) N * rep);
|
||||
printf("%14d,%14.2f,%14.10f,%14.10f,%14.6f,%14.6f\n", N, size, time, time_per_it, cy_per_gather, cy_per_elem);
|
||||
free(a);
|
||||
free(idx);
|
||||
#ifdef TEST
|
||||
free(t);
|
||||
#endif
|
||||
}
|
||||
|
||||
LIKWID_MARKER_CLOSE;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
47
util/gather-bench/src/timing.c
Normal file
47
util/gather-bench/src/timing.c
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
double getTimeStamp()
|
||||
{
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
||||
}
|
||||
|
||||
double getTimeResolution()
|
||||
{
|
||||
struct timespec ts;
|
||||
clock_getres(CLOCK_MONOTONIC, &ts);
|
||||
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
||||
}
|
||||
|
||||
double getTimeStamp_()
|
||||
{
|
||||
return getTimeStamp();
|
||||
}
|
Reference in New Issue
Block a user