[Commits] [svn:einsteintoolkit] incoming/MemSpeed/ (Rev. 88)
schnetter at cct.lsu.edu
schnetter at cct.lsu.edu
Fri Jun 21 20:07:31 CDT 2013
User: eschnett
Date: 2013/06/21 08:07 PM
Added:
/MemSpeed/
README, configuration.ccl, interface.ccl, param.ccl, schedule.ccl
/MemSpeed/doc/
documentation.tex
/MemSpeed/par/
/MemSpeed/src/
make.code.defn, memspeed.cc
/MemSpeed/test/
Log:
New thorn MemSpeed
File Changes:
Directory: /MemSpeed/
=====================
File [added]: README
Delta lines: +9 -0
===================================================================
--- MemSpeed/README (rev 0)
+++ MemSpeed/README 2013-06-22 01:07:31 UTC (rev 88)
@@ -0,0 +1,9 @@
+Cactus Code Thorn MemSpeed
+Author(s) : Erik Schnetter <schnetter at gmail.com>
+Maintainer(s): Erik Schnetter <schnetter at gmail.com>
+Licence : n/a
+--------------------------------------------------------------------------
+
+1. Purpose
+
+Determine the latencies and bandwidths of caches and main memory.
File [added]: configuration.ccl
Delta lines: +3 -0
===================================================================
--- MemSpeed/configuration.ccl (rev 0)
+++ MemSpeed/configuration.ccl 2013-06-22 01:07:31 UTC (rev 88)
@@ -0,0 +1,3 @@
+# Configuration definitions for thorn MemSpeed
+
+REQUIRES Vectors
File [added]: interface.ccl
Delta lines: +17 -0
===================================================================
--- MemSpeed/interface.ccl (rev 0)
+++ MemSpeed/interface.ccl 2013-06-22 01:07:31 UTC (rev 88)
@@ -0,0 +1,17 @@
+# Interface definition for thorn MemSpeed
+
+IMPLEMENTS: MemSpeed
+
+USES INCLUDE HEADER: vectors.h
+
+
+
+CCTK_INT FUNCTION GetCacheInfo1 \
+ (CCTK_POINTER_TO_CONST ARRAY OUT names, \
+ CCTK_INT ARRAY OUT types, \
+ CCTK_POINTER_TO_CONST ARRAY OUT sizes, \
+ CCTK_INT ARRAY OUT linesizes, \
+ CCTK_INT ARRAY OUT strides, \
+ CCTK_INT ARRAY OUT num_puss, \
+ CCTK_INT IN max_num_cache_levels)
+REQUIRES FUNCTION GetCacheInfo1
File [added]: param.ccl
Delta lines: +5 -0
===================================================================
--- MemSpeed/param.ccl (rev 0)
+++ MemSpeed/param.ccl 2013-06-22 01:07:31 UTC (rev 88)
@@ -0,0 +1,5 @@
+# Parameter definitions for thorn MemSpeed
+
+BOOLEAN verbose "Verbose output" STEERABLE=always
+{
+} "no"
File [added]: schedule.ccl
Delta lines: +7 -0
===================================================================
--- MemSpeed/schedule.ccl (rev 0)
+++ MemSpeed/schedule.ccl 2013-06-22 01:07:31 UTC (rev 88)
@@ -0,0 +1,7 @@
+# Schedule definitions for thorn MemSpeed
+
+SCHEDULE MemSpeed_MeasureSpeed AT wragh
+{
+ LANG: C
+ OPTIONS: meta
+} "Measure memory and cache speeds"
Directory: /MemSpeed/doc/
=========================
File [added]: documentation.tex
Delta lines: +144 -0
===================================================================
--- MemSpeed/doc/documentation.tex (rev 0)
+++ MemSpeed/doc/documentation.tex 2013-06-22 01:07:31 UTC (rev 88)
@@ -0,0 +1,144 @@
+% *======================================================================*
+% Cactus Thorn template for ThornGuide documentation
+% Author: Ian Kelley
+% Date: Sun Jun 02, 2002
+% $Header$
+%
+% Thorn documentation in the latex file doc/documentation.tex
+% will be included in ThornGuides built with the Cactus make system.
+% The scripts employed by the make system automatically include
+% pages about variables, parameters and scheduling parsed from the
+% relevant thorn CCL files.
+%
+% This template contains guidelines which help to assure that your
+% documentation will be correctly added to ThornGuides. More
+% information is available in the Cactus UsersGuide.
+%
+% Guidelines:
+% - Do not change anything before the line
+% % START CACTUS THORNGUIDE",
+% except for filling in the title, author, date, etc. fields.
+% - Each of these fields should only be on ONE line.
+% - Author names should be separated with a \\ or a comma.
+% - You can define your own macros, but they must appear after
+% the START CACTUS THORNGUIDE line, and must not redefine standard
+% latex commands.
+% - To avoid name clashes with other thorns, 'labels', 'citations',
+% 'references', and 'image' names should conform to the following
+% convention:
+% ARRANGEMENT_THORN_LABEL
+% For example, an image wave.eps in the arrangement CactusWave and
+% thorn WaveToyC should be renamed to CactusWave_WaveToyC_wave.eps
+% - Graphics should only be included using the graphicx package.
+% More specifically, with the "\includegraphics" command. Do
+% not specify any graphic file extensions in your .tex file. This
+% will allow us to create a PDF version of the ThornGuide
+% via pdflatex.
+% - References should be included with the latex "\bibitem" command.
+% - Use \begin{abstract}...\end{abstract} instead of \abstract{...}
+% - Do not use \appendix, instead include any appendices you need as
+% standard sections.
+% - For the benefit of our Perl scripts, and for future extensions,
+% please use simple latex.
+%
+% *======================================================================*
+%
+% Example of including a graphic image:
+% \begin{figure}[ht]
+% \begin{center}
+% \includegraphics[width=6cm]{MyArrangement_MyThorn_MyFigure}
+% \end{center}
+% \caption{Illustration of this and that}
+% \label{MyArrangement_MyThorn_MyLabel}
+% \end{figure}
+%
+% Example of using a label:
+% \label{MyArrangement_MyThorn_MyLabel}
+%
+% Example of a citation:
+% \cite{MyArrangement_MyThorn_Author99}
+%
+% Example of including a reference
+% \bibitem{MyArrangement_MyThorn_Author99}
+% {J. Author, {\em The Title of the Book, Journal, or periodical}, 1 (1999),
+% 1--16. {\tt http://www.nowhere.com/}}
+%
+% *======================================================================*
+
+% If you are using CVS use this line to give version information
+% $Header$
+
+\documentclass{article}
+
+% Use the Cactus ThornGuide style file
+% (Automatically used from Cactus distribution, if you have a
+% thorn without the Cactus Flesh download this from the Cactus
+% homepage at www.cactuscode.org)
+\usepackage{../../../../doc/latex/cactus}
+
+\begin{document}
+
+% The author of the documentation
+\author{Erik Schnetter \textless schnetter at gmail.com\textgreater}
+
+% The title of the document (not necessarily the name of the Thorn)
+\title{MemSpeed}
+
+% the date your document was last changed, if your document is in CVS,
+% please use:
+% \date{$ $Date: 2004-01-07 14:12:39 -0600 (Wed, 07 Jan 2004) $ $}
+\date{June 17 2013}
+
+\maketitle
+
+% Do not delete next line
+% START CACTUS THORNGUIDE
+
+% Add all definitions used in this documentation here
+% \def\mydef etc
+
+% Add an abstract for this thorn's documentation
+\begin{abstract}
+
+\end{abstract}
+
+% The following sections are suggestive only.
+% Remove them or add your own.
+
+\section{Introduction}
+
+\section{Physical System}
+
+\section{Numerical Implementation}
+
+\section{Using This Thorn}
+
+\subsection{Obtaining This Thorn}
+
+\subsection{Basic Usage}
+
+\subsection{Special Behaviour}
+
+\subsection{Interaction With Other Thorns}
+
+\subsection{Examples}
+
+\subsection{Support and Feedback}
+
+\section{History}
+
+\subsection{Thorn Source Code}
+
+\subsection{Thorn Documentation}
+
+\subsection{Acknowledgements}
+
+
+\begin{thebibliography}{9}
+
+\end{thebibliography}
+
+% Do not delete next line
+% END CACTUS THORNGUIDE
+
+\end{document}
Directory: /MemSpeed/src/
=========================
File [added]: make.code.defn
Delta lines: +7 -0
===================================================================
--- MemSpeed/src/make.code.defn (rev 0)
+++ MemSpeed/src/make.code.defn 2013-06-22 01:07:31 UTC (rev 88)
@@ -0,0 +1,7 @@
+# Main make.code.defn file for thorn MemSpeed
+
+# Source files in this directory
+SRCS = memspeed.cc
+
+# Subdirectories containing source files
+SUBDIRS =
File [added]: memspeed.cc
Delta lines: +483 -0
===================================================================
--- MemSpeed/src/memspeed.cc (rev 0)
+++ MemSpeed/src/memspeed.cc 2013-06-22 01:07:31 UTC (rev 88)
@@ -0,0 +1,483 @@
+#include <cctk.h>
+#include <cctk_Arguments.h>
+#include <cctk_Parameters.h>
+
+#include <vectors.h>
+
+#include <cassert>
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <vector>
+
+using namespace std;
+
+
+
+#ifdef _OPENMP
+# include <omp.h>
+#else
+# include <sys/time.h>
+namespace {
+ double omp_get_wtime()
+ {
+ timeval tv;
+ gettimeofday(&tv, NULL);
+ return tv.tv_sec + 1.0e-6 * tv.tv_usec;
+ }
+}
+#endif
+
+
+
+namespace {
+
+ struct cpu_info_t {
+ double flop_speed;
+ double iop_speed;
+ };
+ cpu_info_t cpu_info;
+
+ struct cache_info_t {
+ string name;
+ int type;
+ ptrdiff_t size;
+ int linesize;
+ int stride;
+ int num_pus;
+
+ double read_latency;
+ double read_bandwidth;
+ double write_latency;
+ double write_bandwidth;
+ };
+ vector<cache_info_t> cache_info;
+
+
+
+ void load_cache_info()
+ {
+ const int num_cache_levels = GetCacheInfo1(0, 0, 0, 0, 0, 0, 0);
+ vector<CCTK_POINTER_TO_CONST> names_(num_cache_levels);
+ vector<CCTK_INT> types_(num_cache_levels);
+ vector<CCTK_POINTER_TO_CONST> sizes_(num_cache_levels);
+ vector<CCTK_INT> linesizes_(num_cache_levels);
+ vector<CCTK_INT> strides_(num_cache_levels);
+ vector<CCTK_INT> num_puss_(num_cache_levels);
+ GetCacheInfo1(&names_[0], &types_[0],
+ &sizes_[0], &linesizes_[0], &strides_[0], &num_puss_[0],
+ num_cache_levels);
+ cache_info.resize(num_cache_levels);
+ for (int n=0; n<num_cache_levels; ++n) {
+ cache_info[n].name = (const char*)(names_[n]);
+ cache_info[n].type = types_[n];
+ cache_info[n].size = ptrdiff_t(sizes_[n]);
+ cache_info[n].linesize = linesizes_[n];
+ cache_info[n].stride = strides_[n];
+ cache_info[n].num_pus = num_puss_[n];
+ }
+ }
+
+
+
+ void measure_cpu_flop_speed()
+ {
+ DECLARE_CCTK_PARAMETERS;
+
+ printf(" CPU floating point performance:");
+ if (verbose) {
+ printf("\n");
+ }
+ double min_elapsed = 1.0;
+ ptrdiff_t max_count = 1000000;
+ double elapsed = 0.0;
+ for (;;) {
+ if (verbose) {
+ printf(" iterations=%td...", max_count);
+ fflush(stdout);
+ }
+ const double t0 = omp_get_wtime();
+ CCTK_REAL_VEC s0, s1, s2, s3, s4, s5, s6, s7;
+ s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7 = vec_set1(1.0);
+ for (ptrdiff_t count=0; count<max_count; ++count) {
+ s0 = kmadd(vec_set1(1.1), s0, vec_set1(-0.1));
+ s1 = kmadd(vec_set1(1.1), s1, vec_set1(-0.1));
+ s2 = kmadd(vec_set1(1.1), s2, vec_set1(-0.1));
+ s3 = kmadd(vec_set1(1.1), s3, vec_set1(-0.1));
+ s4 = kmadd(vec_set1(1.1), s4, vec_set1(-0.1));
+ s5 = kmadd(vec_set1(1.1), s5, vec_set1(-0.1));
+ s6 = kmadd(vec_set1(1.1), s6, vec_set1(-0.1));
+ s7 = kmadd(vec_set1(1.1), s7, vec_set1(-0.1));
+ }
+ volatile CCTK_REAL_VEC use_s CCTK_ATTRIBUTE_UNUSED =
+ kadd(kadd(kadd(s0, s1), kadd(s2, s3)),
+ kadd(kadd(s4, s5), kadd(s6, s7)));
+ const double t1 = omp_get_wtime();
+ elapsed = t1 - t0;
+ if (verbose) {
+ printf(" time=%g sec\n", elapsed);
+ }
+ if (elapsed >= min_elapsed) break;
+ max_count *= llrint(max(2.0, min(10.0, 1.1 * min_elapsed / elapsed)));
+ }
+ cpu_info.flop_speed = max_count * 8 * CCTK_REAL_VEC_SIZE * 2 / elapsed;
+ if (verbose) {
+ printf(" result:");
+ }
+ printf(" %g Gflop/sec for each PU\n", cpu_info.flop_speed / 1.0e+9);
+ }
+
+
+
+ void measure_cpu_iop_speed()
+ {
+ DECLARE_CCTK_PARAMETERS;
+
+ printf(" CPU integer performance:");
+ if (verbose) {
+ printf("\n");
+ }
+ double min_elapsed = 1.0;
+ ptrdiff_t max_count = 1000000;
+ double elapsed = 0.0;
+ for (;;) {
+ if (verbose) {
+ printf(" iterations=%td...", max_count);
+ fflush(stdout);
+ }
+ const double t0 = omp_get_wtime();
+ vector<CCTK_REAL> base(1000);
+ ptrdiff_t s0, s1, s2, s3, s4, s5, s6, s7;
+ s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7 = 0;
+ for (ptrdiff_t count=0; count<max_count; ++count) {
+ s0 = ptrdiff_t(&base[ s0]);
+ s1 = ptrdiff_t(&base[2*s1]);
+ s2 = ptrdiff_t(&base[3*s2]);
+ s3 = ptrdiff_t(&base[4*s3]);
+ s4 = ptrdiff_t(&base[5*s4]);
+ s5 = ptrdiff_t(&base[6*s5]);
+ s6 = ptrdiff_t(&base[7*s6]);
+ s7 = ptrdiff_t(&base[8*s7]);
+ }
+ volatile ptrdiff_t use_s CCTK_ATTRIBUTE_UNUSED =
+ s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7;
+ const double t1 = omp_get_wtime();
+ elapsed = t1 - t0;
+ if (verbose) {
+ printf(" time=%g sec\n", elapsed);
+ }
+ if (elapsed >= min_elapsed) break;
+ max_count *= llrint(max(2.0, min(10.0, 1.1 * min_elapsed / elapsed)));
+ }
+ cpu_info.iop_speed = max_count * 8 * 2 / elapsed;
+ if (verbose) {
+ printf(" result:");
+ }
+ printf(" %g Giop/sec for each PU\n", cpu_info.iop_speed / 1.0e+9);
+ }
+
+
+
+ void calc_sizes(int cache, ptrdiff_t& skipsize, ptrdiff_t& size)
+ {
+ if (cache_info[cache].type==1) {
+ if (cache>0 && cache_info[cache-1].type==1) {
+ // Global memory, and there is also local memory
+ skipsize = cache_info[cache-1].size;
+ size = (cache_info[cache].size - skipsize) / 4;
+ assert(size >= skipsize/4);
+ } else {
+ // Local memory or only memory
+ skipsize = 0;
+ size = cache_info[cache].size / 2;
+ }
+ } else {
+ // Cache
+ skipsize = 0;
+ size = cache_info[cache].size * 3 / 4;
+ }
+ }
+
+
+
+ void measure_read_latency()
+ {
+ DECLARE_CCTK_PARAMETERS;
+
+ printf(" Read latency:\n");
+ for (int cache=0; cache<int(cache_info.size()); ++cache) {
+ ptrdiff_t skipsize, size;
+ calc_sizes(cache, skipsize, size);
+ assert(size>0);
+ const ptrdiff_t step = cache_info[cache].linesize;
+ assert(step>0);
+ if (verbose) {
+ printf(" %s read latency (using %td bytes):\n",
+ cache_info[cache].name.c_str(), size);
+ fflush(stdout);
+ } else {
+ printf(" %s read latency:", cache_info[cache].name.c_str());
+ }
+ vector<char> skiparray(skipsize, 1);
+ const ptrdiff_t offset = 0xa1d2d5ff; // a random number
+ const ptrdiff_t nmax = size / sizeof(void*);
+ vector<void*> array(nmax);
+ {
+ ptrdiff_t i = 0;
+ for (ptrdiff_t n=0; n<nmax; ++n) {
+ ptrdiff_t next_i = (i+offset) % nmax;
+ if (array[i] && n != nmax-1) ++next_i;
+ assert(!array[i]);
+ array[i] = &array[next_i];
+ i = next_i;
+ }
+ assert(i == 0);
+ }
+ double min_elapsed = 1.0;
+ ptrdiff_t max_count = 1000;
+ double elapsed = 0.0;
+ for (;;) {
+ if (verbose) {
+ printf(" iterations=%td...", max_count);
+ fflush(stdout);
+ }
+ const double t0 = omp_get_wtime();
+ void* ptr = &array[0];
+ for (ptrdiff_t count=0; count<max_count; ++count) {
+#define REPEAT10(x) x x x x x x x x x x
+ REPEAT10(REPEAT10(ptr = *(void**)ptr;));
+#undef REPEAT10
+ }
+ volatile bool use_ptr CCTK_ATTRIBUTE_UNUSED = ptr;
+ const double t1 = omp_get_wtime();
+ elapsed = t1 - t0;
+ if (verbose) {
+ printf(" time=%g sec\n", elapsed);
+ }
+ if (elapsed >= min_elapsed) break;
+ max_count *= llrint(max(2.0, min(10.0, 1.1 * min_elapsed / elapsed)));
+ }
+ cache_info[cache].read_latency = elapsed / (max_count * 100);
+ if (verbose) {
+ printf(" result:");
+ }
+ printf(" %g nsec\n", cache_info[cache].read_latency * 1.0e+9);
+ }
+ }
+
+
+
+ void measure_read_bandwidth()
+ {
+ DECLARE_CCTK_PARAMETERS;
+
+ printf(" Read bandwidth:\n");
+ for (int cache=0; cache<int(cache_info.size()); ++cache) {
+ ptrdiff_t skipsize, size;
+ calc_sizes(cache, skipsize, size);
+ assert(size>0);
+ if (verbose) {
+ printf(" %s read bandwidth (using %td bytes):\n",
+ cache_info[cache].name.c_str(), size);
+ fflush(stdout);
+ } else {
+ printf(" %s read bandwidth:", cache_info[cache].name.c_str());
+ }
+ vector<char> skiparray(skipsize, 1);
+ const ptrdiff_t nmax = size / sizeof(CCTK_REAL);
+ vector<CCTK_REAL> raw_array(nmax + CCTK_REAL_VEC_SIZE-1, 1.0);
+ CCTK_REAL* restrict array = &raw_array[CCTK_REAL_VEC_SIZE-1];
+ array = (CCTK_REAL*)(ptrdiff_t(array) & -sizeof(CCTK_REAL_VEC));
+ double min_elapsed = 1.0;
+ ptrdiff_t max_count = 1;
+ double elapsed = 0.0;
+ for (;;) {
+ if (verbose) {
+ printf(" iterations=%td...", max_count);
+ fflush(stdout);
+ }
+ const double t0 = omp_get_wtime();
+ for (ptrdiff_t count=0; count<max_count; ++count) {
+ CCTK_REAL_VEC s0, s1, s2, s3, s4, s5, s6, s7;
+ s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7 = vec_set1(0.0);
+ const ptrdiff_t dn = CCTK_REAL_VEC_SIZE;
+ for (ptrdiff_t n=0; n<nmax;) {
+ s0 = kmadd(vec_load(array[n]), s0, vec_load(array[n+dn]));
+ n += 2*dn;
+ s1 = kmadd(vec_load(array[n]), s1, vec_load(array[n+dn]));
+ n += 2*dn;
+ s2 = kmadd(vec_load(array[n]), s2, vec_load(array[n+dn]));
+ n += 2*dn;
+ s3 = kmadd(vec_load(array[n]), s3, vec_load(array[n+dn]));
+ n += 2*dn;
+ s4 = kmadd(vec_load(array[n]), s4, vec_load(array[n+dn]));
+ n += 2*dn;
+ s5 = kmadd(vec_load(array[n]), s5, vec_load(array[n+dn]));
+ n += 2*dn;
+ s6 = kmadd(vec_load(array[n]), s6, vec_load(array[n+dn]));
+ n += 2*dn;
+ s7 = kmadd(vec_load(array[n]), s7, vec_load(array[n+dn]));
+ n += 2*dn;
+ }
+ volatile CCTK_REAL_VEC use_s CCTK_ATTRIBUTE_UNUSED =
+ kadd(kadd(kadd(s0, s1), kadd(s2, s3)),
+ kadd(kadd(s4, s5), kadd(s6, s7)));
+ }
+ const double t1 = omp_get_wtime();
+ elapsed = t1 - t0;
+ if (verbose) {
+ printf(" time=%g sec\n", elapsed);
+ }
+ if (elapsed >= min_elapsed) break;
+ max_count *= llrint(max(2.0, min(10.0, 1.1 * min_elapsed / elapsed)));
+ }
+ cache_info[cache].read_bandwidth = max_count * size / elapsed;
+ if (verbose) {
+ printf(" result:");
+ }
+ printf(" %g GByte/sec for %d PUs\n",
+ cache_info[cache].read_bandwidth / 1.0e+9,
+ cache_info[cache].num_pus);
+ }
+ }
+
+
+
+ void measure_write_latency()
+ {
+ DECLARE_CCTK_PARAMETERS;
+
+ printf(" Write latency:\n");
+ for (int cache=0; cache<int(cache_info.size()); ++cache) {
+ ptrdiff_t skipsize, size;
+ calc_sizes(cache, skipsize, size);
+ assert(size>0);
+ size = ptrdiff_t(1) << ilogb(double(size));
+ const ptrdiff_t size_mask = size - 1;
+ const ptrdiff_t offset = 0xa1d2d5ff; // a random number
+ assert(size>0);
+ if (verbose) {
+ printf(" %s write latency (using %td bytes):\n",
+ cache_info[cache].name.c_str(), size);
+ fflush(stdout);
+ } else {
+ printf(" %s write latency:", cache_info[cache].name.c_str());
+ }
+ vector<char> skiparray(skipsize, 1);
+ vector<char> array_(size, 1);
+ char* restrict array = &array_[0];
+ double min_elapsed = 1.0;
+ ptrdiff_t max_count = 1000;
+ double elapsed = 0.0;
+ while (elapsed < min_elapsed) {
+ if (verbose) {
+ printf(" iterations=%td...", max_count);
+ fflush(stdout);
+ }
+ const double t0 = omp_get_wtime();
+ ptrdiff_t n = 0;
+ for (ptrdiff_t count=0; count<max_count; ++count) {
+ array[n & size_mask] = 2;
+ n += offset;
+ array[n & size_mask] = 2;
+ n += offset;
+ array[n & size_mask] = 2;
+ n += offset;
+ array[n & size_mask] = 2;
+ n += offset;
+ array[n & size_mask] = 2;
+ n += offset;
+ array[n & size_mask] = 2;
+ n += offset;
+ array[n & size_mask] = 2;
+ n += offset;
+ array[n & size_mask] = 2;
+ n += offset;
+ }
+ volatile char use_array CCTK_ATTRIBUTE_UNUSED = array[0];
+ const double t1 = omp_get_wtime();
+ elapsed = t1 - t0;
+ if (verbose) {
+ printf(" time=%g sec\n", elapsed);
+ }
+ max_count *= llrint(max(2.0, min(10.0, 1.1 * min_elapsed / elapsed)));
+ }
+ cache_info[cache].write_latency = elapsed / (max_count * 8);
+ if (verbose) {
+ printf(" result:");
+ }
+ printf(" %g nsec\n", cache_info[cache].write_latency * 1.0e+9);
+ }
+ }
+
+
+
+ void measure_write_bandwidth()
+ {
+ DECLARE_CCTK_PARAMETERS;
+
+ printf(" Write bandwidth:\n");
+ for (int cache=0; cache<int(cache_info.size()); ++cache) {
+ ptrdiff_t skipsize, size;
+ calc_sizes(cache, skipsize, size);
+ assert(size>0);
+ if (verbose) {
+ printf(" %s write bandwidth (using %td bytes):\n",
+ cache_info[cache].name.c_str(), size);
+ fflush(stdout);
+ } else {
+ printf(" %s write bandwidth:", cache_info[cache].name.c_str());
+ }
+ vector<char> skiparray(skipsize, 1);
+ vector<char> array(size, 1);
+ double min_elapsed = 1.0;
+ ptrdiff_t max_count = 1;
+ double elapsed = 0.0;
+ for (;;) {
+ if (verbose) {
+ printf(" iterations=%td...", max_count);
+ fflush(stdout);
+ }
+ const double t0 = omp_get_wtime();
+ for (ptrdiff_t count=0; count<max_count; ++count) {
+ memset(&array[0], count % 256, size);
+ volatile char use_array CCTK_ATTRIBUTE_UNUSED = array[count % size];
+ }
+ const double t1 = omp_get_wtime();
+ elapsed = t1 - t0;
+ if (verbose) {
+ printf(" time=%g sec\n", elapsed);
+ }
+ if (elapsed >= min_elapsed) break;
+ max_count *= llrint(max(2.0, min(10.0, 1.1 * min_elapsed / elapsed)));
+ }
+ cache_info[cache].write_bandwidth = max_count * size / elapsed;
+ if (verbose) {
+ printf(" result:");
+ }
+ printf(" %g GByte/sec for %d PUs\n",
+ cache_info[cache].write_bandwidth / 1.0e+9,
+ cache_info[cache].num_pus);
+ }
+ }
+
+}
+
+
+
+extern "C"
+void MemSpeed_MeasureSpeed(CCTK_ARGUMENTS)
+{
+ DECLARE_CCTK_ARGUMENTS;
+
+ if (CCTK_MyProc(cctkGH) != 0) return;
+
+ CCTK_INFO("Measuring CPU, cache, and memory speeds:");
+ load_cache_info();
+ measure_cpu_flop_speed();
+ measure_cpu_iop_speed();
+ measure_read_latency();
+ measure_read_bandwidth();
+ measure_write_latency();
+ measure_write_bandwidth();
+}
More information about the Commits
mailing list