Compare commits
7 Commits
02629612a9
...
mucosim23
Author | SHA1 | Date | |
---|---|---|---|
|
a6a269703d | ||
|
7ee250161a | ||
|
c73efea786 | ||
|
4cfa664533 | ||
|
1837403326 | ||
|
ce00aa0042 | ||
|
c4e5e87265 |
@@ -66,7 +66,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
{
|
{
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
#pragma omp for
|
#pragma omp for schedule(runtime)
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||||
int ci_cj1 = CJ1_FROM_CI(ci);
|
int ci_cj1 = CJ1_FROM_CI(ci);
|
||||||
@@ -213,7 +213,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
|||||||
#endif
|
#endif
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#pragma omp for
|
#pragma omp for schedule(runtime)
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||||
#if CLUSTER_M > CLUSTER_N
|
#if CLUSTER_M > CLUSTER_N
|
||||||
@@ -427,7 +427,7 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
|
|||||||
{
|
{
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
#pragma omp for
|
#pragma omp for schedule(runtime)
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||||
#if CLUSTER_M > CLUSTER_N
|
#if CLUSTER_M > CLUSTER_N
|
||||||
@@ -595,7 +595,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
|
|||||||
{
|
{
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
#pragma omp for
|
#pragma omp for schedule(runtime)
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||||
#if CLUSTER_M > CLUSTER_N
|
#if CLUSTER_M > CLUSTER_N
|
||||||
@@ -869,7 +869,7 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
|
|||||||
{
|
{
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
#pragma omp for
|
#pragma omp for schedule(runtime)
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||||
#if CLUSTER_M > CLUSTER_N
|
#if CLUSTER_M > CLUSTER_N
|
||||||
|
@@ -5,7 +5,9 @@
|
|||||||
* license that can be found in the LICENSE file.
|
* license that can be found in the LICENSE file.
|
||||||
*/
|
*/
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
#include <omp.h>
|
||||||
//--
|
//--
|
||||||
#include <likwid-marker.h>
|
#include <likwid-marker.h>
|
||||||
//--
|
//--
|
||||||
@@ -308,6 +310,30 @@ int main(int argc, char** argv) {
|
|||||||
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
|
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
|
||||||
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
|
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
|
|
||||||
|
int nthreads = 0;
|
||||||
|
int chunkSize = 0;
|
||||||
|
omp_sched_t schedKind;
|
||||||
|
char schedType[10];
|
||||||
|
#pragma omp parallel
|
||||||
|
#pragma omp master
|
||||||
|
{
|
||||||
|
omp_get_schedule(&schedKind, &chunkSize);
|
||||||
|
|
||||||
|
switch (schedKind)
|
||||||
|
{
|
||||||
|
case omp_sched_static: strcpy(schedType, "static"); break;
|
||||||
|
case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
|
||||||
|
case omp_sched_guided: strcpy(schedType, "guided"); break;
|
||||||
|
case omp_sched_auto: strcpy(schedType, "auto"); break;
|
||||||
|
}
|
||||||
|
|
||||||
|
nthreads = omp_get_max_threads();
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Num threads: %d\n", nthreads);
|
||||||
|
printf("Schedule: (%s,%d)\n", schedType, chunkSize);
|
||||||
|
|
||||||
printf("Performance: %.2f million atom updates per second\n",
|
printf("Performance: %.2f million atom updates per second\n",
|
||||||
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
||||||
#ifdef COMPUTE_STATS
|
#ifdef COMPUTE_STATS
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
CC = icc
|
CC = icc
|
||||||
LINKER = $(CC)
|
LINKER = $(CC)
|
||||||
|
|
||||||
OPENMP = #-qopenmp
|
OPENMP = -qopenmp
|
||||||
PROFILE = #-profile-functions -g -pg
|
PROFILE = #-profile-functions -g -pg
|
||||||
|
|
||||||
ifeq ($(ISA),AVX512)
|
ifeq ($(ISA),AVX512)
|
||||||
|
@@ -41,7 +41,7 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
|
|||||||
{
|
{
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
#pragma omp for
|
#pragma omp for schedule(runtime)
|
||||||
for(int i = 0; i < Nlocal; i++) {
|
for(int i = 0; i < Nlocal; i++) {
|
||||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||||
int numneighs = neighbor->numneigh[i];
|
int numneighs = neighbor->numneigh[i];
|
||||||
@@ -131,7 +131,7 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
|
|||||||
{
|
{
|
||||||
LIKWID_MARKER_START("forceLJ-halfneigh");
|
LIKWID_MARKER_START("forceLJ-halfneigh");
|
||||||
|
|
||||||
#pragma omp for
|
#pragma omp for schedule(runtime)
|
||||||
for(int i = 0; i < Nlocal; i++) {
|
for(int i = 0; i < Nlocal; i++) {
|
||||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||||
int numneighs = neighbor->numneigh[i];
|
int numneighs = neighbor->numneigh[i];
|
||||||
@@ -227,7 +227,7 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
|
|||||||
{
|
{
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
#pragma omp for
|
#pragma omp for schedule(runtime)
|
||||||
for(int i = 0; i < Nlocal; i++) {
|
for(int i = 0; i < Nlocal; i++) {
|
||||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||||
int numneighs = neighbor->numneigh[i];
|
int numneighs = neighbor->numneigh[i];
|
||||||
|
@@ -11,6 +11,7 @@
|
|||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
|
#include <omp.h>
|
||||||
|
|
||||||
#include <likwid-marker.h>
|
#include <likwid-marker.h>
|
||||||
|
|
||||||
@@ -292,6 +293,30 @@ int main(int argc, char** argv) {
|
|||||||
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
|
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
|
||||||
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
|
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
|
|
||||||
|
int nthreads = 0;
|
||||||
|
int chunkSize = 0;
|
||||||
|
omp_sched_t schedKind;
|
||||||
|
char schedType[10];
|
||||||
|
#pragma omp parallel
|
||||||
|
#pragma omp master
|
||||||
|
{
|
||||||
|
omp_get_schedule(&schedKind, &chunkSize);
|
||||||
|
|
||||||
|
switch (schedKind)
|
||||||
|
{
|
||||||
|
case omp_sched_static: strcpy(schedType, "static"); break;
|
||||||
|
case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
|
||||||
|
case omp_sched_guided: strcpy(schedType, "guided"); break;
|
||||||
|
case omp_sched_auto: strcpy(schedType, "auto"); break;
|
||||||
|
}
|
||||||
|
|
||||||
|
nthreads = omp_get_max_threads();
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Num threads: %d\n", nthreads);
|
||||||
|
printf("Schedule: (%s,%d)\n", schedType, chunkSize);
|
||||||
|
|
||||||
printf("Performance: %.2f million atom updates per second\n",
|
printf("Performance: %.2f million atom updates per second\n",
|
||||||
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
||||||
#ifdef COMPUTE_STATS
|
#ifdef COMPUTE_STATS
|
||||||
|
Reference in New Issue
Block a user