From 9fbb1ce57215d5fb9e028c449774813e50d37d10 Mon Sep 17 00:00:00 2001 From: Ansgar Esztermann Date: Wed, 28 Apr 2010 16:36:41 +0200 Subject: [PATCH] Implement a Morton curve in gather_f_bsplines() to improve cache locality In gather_f_bsplines(), copy the original row-major FFT grid to a different array organized along a space filling (fractal) curve. This improves locality, thereby reducing the number of L2 cache misses. --- src/mdlib/pme.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 93 insertions(+), 1 deletions(-) diff --git a/src/mdlib/pme.c b/src/mdlib/pme.c index cd7aca4..5e6009c 100644 --- a/src/mdlib/pme.c +++ b/src/mdlib/pme.c @@ -97,6 +97,8 @@ /* #define TAKETIME (step > 1 && timesteps < 10) */ #define TAKETIME FALSE +#define PME_GRID_MORTON + #ifdef GMX_DOUBLE #define mpi_type MPI_DOUBLE #else @@ -174,6 +176,10 @@ typedef struct gmx_pme { t_fftgrid *gridA,*gridB; int *nnx,*nny,*nnz; +#ifdef PME_GRID_MORTON + real *morton; +#endif + pme_atomcomm_t atc[2]; matrix recipbox; splinevec bsp_mod; @@ -1157,6 +1163,31 @@ real solve_pme(gmx_pme_t pme,t_fftgrid *grid, return(0.5*energy); } +#ifdef PME_GRID_MORTON +#define DO_FSPLINE(order) \ +for(ithx=0; (ithxmorton; + to_morton(morton, ptr, nx, ny, nz, la12, la2); +#endif + order = pme->pme_order; thx = atc->theta[XX]; thy = atc->theta[YY]; @@ -1678,11 +1748,17 @@ int gmx_pme_init(gmx_pme_t *pmedata,t_commrec *cr,int nnodes_major, pme_atomcomm_t *atc; int nminor,b,d,i,lbnd,rbnd,maxlr; +#ifdef PME_GRID_MORTON + int morton_size; +#endif if (debug) fprintf(debug,"Creating PME data structures.\n"); snew(pme,1); - + +#ifdef PME_GRID_MORTON + pme->morton = NULL; +#endif pme->redist_init = FALSE; pme->sum_qgrid_tmp = NULL; @@ -1807,6 +1883,22 @@ int gmx_pme_init(gmx_pme_t *pmedata,t_commrec *cr,int nnodes_major, snew(pme->bsp_mod[YY],pme->nky); snew(pme->bsp_mod[ZZ],pme->nkz); +#ifdef PME_GRID_MORTON + morton_size = max(pme->nkx, pme->nky); + morton_size = max(morton_size, pme->nkz); + + /* round up to next power of two */ + morton_size--; + morton_size |= morton_size >> 1; + morton_size |= morton_size >> 2; + morton_size |= morton_size >> 4; + morton_size |= morton_size >> 8; + morton_size |= morton_size >> 16; + morton_size++; + + snew(pme->morton, morton_size * morton_size * morton_size); +#endif + pme->gridA = mk_fftgrid(pme->nkx,pme->nky,pme->nkz, NULL,pme->overlap[0].s2g,cr, bReproducible); -- 1.6.3.3