167 lines
5.1 KiB
C

/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <limits.h>
#include <float.h>
//---
#include <likwid-marker.h>
//---
#include <timing.h>
#include <allocate.h>
#if !defined(ISA_avx2) && !defined (ISA_avx512)
#error "Invalid ISA macro, possible values are: avx2 and avx512"
#endif
#define HLINE "----------------------------------------------------------------------------\n"
#ifndef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#endif
#ifndef MAX
#define MAX(x,y) ((x)>(y)?(x):(y))
#endif
#ifndef ABS
#define ABS(a) ((a) >= 0 ? (a) : -(a))
#endif
#define ARRAY_ALIGNMENT 64
#define SIZE 20000
#ifdef ISA_avx512
#define _VL_ 8
#define ISA_STRING "avx512"
#else
#define _VL_ 4
#define ISA_STRING "avx2"
#endif
#ifdef TEST
extern void gather(double*, int*, int, double*);
#else
extern void gather(double*, int*, int);
#endif
int main (int argc, char** argv) {
LIKWID_MARKER_INIT;
LIKWID_MARKER_REGISTER("gather");
if (argc < 3) {
printf("Please provide stride and frequency\n");
printf("%s <stride> <freq (GHz)> [cache line size (B)]\n", argv[0]);
return -1;
}
int stride = atoi(argv[1]);
double freq = atof(argv[2]);
int cl_size = (argc == 3) ? 64 : atoi(argv[3]);
size_t bytesPerWord = sizeof(double);
size_t cacheLinesPerGather = MIN(MAX(stride * _VL_ / (cl_size / sizeof(double)), 1), _VL_);
size_t N = SIZE;
double E, S;
printf("ISA,Stride (elems),Frequency (GHz),Cache Line Size (B),Vector Width (elems),Cache Lines/Gather\n");
printf("%s,%d,%f,%d,%d,%lu\n\n", ISA_STRING, stride, freq, cl_size, _VL_, cacheLinesPerGather);
printf("%14s,%14s,%14s,%14s,%14s,%14s\n", "N", "Size(kB)", "tot. time", "time/LUP(ms)", "cy/gather", "cy/elem");
freq = freq * 1e9;
for(int N = 1024; N < 400000; N = 1.5 * N) {
int N_alloc = N * 2;
double* a = (double*) allocate( ARRAY_ALIGNMENT, N_alloc * sizeof(double) );
int* idx = (int*) allocate( ARRAY_ALIGNMENT, N_alloc * sizeof(int) );
int rep;
double time;
#ifdef TEST
double* t = (double*) allocate( ARRAY_ALIGNMENT, N_alloc * sizeof(double) );
#endif
for(int i = 0; i < N_alloc; ++i) {
a[i] = i;
idx[i] = (i * stride) % N;
}
S = getTimeStamp();
for(int r = 0; r < 100; ++r) {
#ifdef TEST
gather(a, idx, N, t);
#else
gather(a, idx, N);
#endif
}
E = getTimeStamp();
rep = 100 * (0.5 / (E - S));
S = getTimeStamp();
LIKWID_MARKER_START("gather");
for(int r = 0; r < rep; ++r) {
#ifdef TEST
gather(a, idx, N, t);
#else
gather(a, idx, N);
#endif
}
LIKWID_MARKER_STOP("gather");
E = getTimeStamp();
time = E - S;
#ifdef TEST
int test_failed = 0;
for(int i = 0; i < N; ++i) {
if(t[i] != i * stride % N) {
test_failed = 1;
break;
}
}
if(test_failed) {
printf("Test failed!\n");
return EXIT_FAILURE;
} else {
printf("Test passed!\n");
}
#endif
const double size = N * (sizeof(double) + sizeof(int)) / 1000.0;
const double time_per_it = time * 1e6 / ((double) N * rep);
const double cy_per_gather = time * freq * _VL_ / ((double) N * rep);
const double cy_per_elem = time * freq / ((double) N * rep);
printf("%14d,%14.2f,%14.10f,%14.10f,%14.6f,%14.6f\n", N, size, time, time_per_it, cy_per_gather, cy_per_elem);
free(a);
free(idx);
#ifdef TEST
free(t);
#endif
}
LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS;
}