[Users] Issues with Carpet run stalling on RIT cluster
Yosef Zlochower
yosef at astro.rit.edu
Fri May 31 06:55:54 CDT 2013
Hi,
I was wondering if anyone else saw this issue, or if it's specific to
the particular cluster I am using.
The problem is that the runs stall after a few hours. Basically, two
processes (of 10-12)
get killed due to a failed assert within the mpi library (I'm using
openmpi on a qlogic infinipath
IB network). From the backtrace, it looks like the recompose step is the
one initiating the MPI
call. Despite the name of the executable, this was actually run on a
sandybridge processor.
The OS is CentOs6, and I'm using icc version 13.1.1. The ET version is
Orsted.
I saw this issue with many different runs. However, the backtrace and
assert failure below came from
runs that used the "ParitySymmetry" thorn and the associated changes to
CarpetRegrid2. I included
the patch to CarpetRegrid2 at the bottom.
n015:3.0.Assertion failure at ptl.c:200: nbytes == msglen
n018:3.0.Assertion failure at ptl.c:200: nbytes == msglen
Backtrace from rank 2 pid 27135:
1. /lib64/libc.so.6() [0x32a4832920]
2. /lib64/libc.so.6(gsignal+0x35) [0x32a48328a5]
3. /lib64/libc.so.6(abort+0x175) [0x32a4834085]
4. /usr/lib64/libpsm_infinipath.so.1(+0x17b6d) [0x7f577e6c6b6d]
5. /usr/lib64/libpsm_infinipath.so.1(psmi_handle_error+0x261)
[0x7f577e6c6dd1]
6. /usr/lib64/libpsm_infinipath.so.1(psmi_am_mq_handler_rtsmatch+0x17a)
[0x7f577e6c1f6a]
7. /usr/lib64/libpsm_infinipath.so.1(+0xa832) [0x7f577e6b9832]
8. /usr/lib64/libpsm_infinipath.so.1(+0xd90f) [0x7f577e6bc90f]
9. /usr/lib64/libpsm_infinipath.so.1(psmi_poll_internal+0x29)
[0x7f577e6df309]
a. /usr/lib64/libpsm_infinipath.so.1(psm_mq_ipeek+0xa5) [0x7f577e6dde05]
b. /usr/mpi/gcc/openmpi-1.4.3-qlc/lib64/openmpi/mca_mtl_psm.so(+0x15f4)
[0x7f577e9025f4]
c.
/usr/mpi/gcc/openmpi-1.4.3-qlc/lib64/libopen-pal.so.0(opal_progress+0x5a) [0x7f57815f30fa]
d. /usr/mpi/gcc/openmpi-1.4.3-qlc/lib64/libmpi.so.0(+0x35685)
[0x7f5782f8a685]
e. /usr/mpi/gcc/openmpi-1.4.3-qlc/lib64/libmpi.so.0(PMPI_Waitall+0xa3)
[0x7f5782fb6c73]
f. comm_state::step()
[./cactus_lazevnehalem(_ZN10comm_state4stepEv+0x4a2) [0x8826c2]]
10. dh::recompose(int, bool)
[./cactus_lazevnehalem(_ZN2dh9recomposeEib+0x218) [0x89d8a8]]
11. gh::recompose(int, bool)
[./cactus_lazevnehalem(_ZN2gh9recomposeEib+0x52) [0x8dc722]]
12. Carpet::Recompose(_cGH const*, int, bool)
[./cactus_lazevnehalem(_ZN6Carpet9RecomposeEPK4_cGHib+0xea) [0x7db6ea]]
diff --git a/Carpet/CarpetRegrid2/param.ccl b/Carpet/CarpetRegrid2/param.ccl
index c7327d2..4843abe 100644
--- a/Carpet/CarpetRegrid2/param.ccl
+++ b/Carpet/CarpetRegrid2/param.ccl
@@ -62,6 +62,11 @@ BOOLEAN symmetry_rotating180 "Ensure a 180 degree
rotating symmetry about the z
{
} no
+BOOLEAN symmetry_parity "parity "
+{
+} no
+
+
BOOLEAN symmetry_periodic_x "Ensure a periodicity symmetry in the x
direction"
{
} no
diff --git a/Carpet/CarpetRegrid2/src/paramcheck.cc
b/Carpet/CarpetRegrid2/src/paramcheck.cc
index 5cc8978..679a562 100644
--- a/Carpet/CarpetRegrid2/src/paramcheck.cc
+++ b/Carpet/CarpetRegrid2/src/paramcheck.cc
@@ -25,7 +25,7 @@ namespace CarpetRegrid2 {
DECLARE_CCTK_ARGUMENTS;
DECLARE_CCTK_PARAMETERS;
- enum sym_t { sym_unknown, sym_90, sym_180 };
+ enum sym_t { sym_unknown, sym_90, sym_180, sym_parity };
int num_params = 0;
sym_t params = sym_unknown;
@@ -40,7 +40,13 @@ namespace CarpetRegrid2 {
params = sym_180;
param = "symmetry_rotating180";
}
-
+
+ if (symmetry_parity) {
+ ++num_params;
+ params = sym_parity;
+ param = "symmetry_parity";
+ }
+
int num_thorns = 0;
sym_t thorns = sym_unknown;
char const* thorn = "";
@@ -59,13 +65,18 @@ namespace CarpetRegrid2 {
thorns = sym_180;
thorn = "RotatingSymmetry180";
}
-
+ if (CCTK_IsThornActive ("ParitySymmetry")) {
+ ++num_thorns;
+ thorns = sym_parity;
+ thorn = "ParitySymmetry";
+ }
+
if (num_params > 1) {
- CCTK_PARAMWARN ("Too many of the symmetry parameters
symmetry_rotating90 and symmetry_rotating180 are specified. (At most
one of these can be specified.)");
+ CCTK_PARAMWARN ("Too many of the symmetry parameters at least two
of symmetry_rotating90, symmetry_rotating180, and parity_symmetry are
specified. (At most one of these can be specified.)");
}
if (num_thorns > 1) {
- CCTK_PARAMWARN ("Too many of the symmetry thorns
RotatingSymmetry90, RotatingSymmetry90r, and RotatingSymmetry180 are
active. (At most one of these can be active.)");
+ CCTK_PARAMWARN ("Too many of the symmetry thorns
RotatingSymmetry90, RotatingSymmetry90r, RotatingSymmetry180, and
ParitySymmetry are active. (At most one of these can be active.)");
}
if (params != sym_unknown and thorns != sym_unknown and params !=
thorns) {
diff --git a/Carpet/CarpetRegrid2/src/property.cc
b/Carpet/CarpetRegrid2/src/property.cc
index a568e82..c2a31a1 100644
--- a/Carpet/CarpetRegrid2/src/property.cc
+++ b/Carpet/CarpetRegrid2/src/property.cc
@@ -577,7 +577,121 @@ namespace CarpetRegrid2 {
}
}
+
//////////////////////////////////////////////////////////////////////////////
+ // Make the boxes parity symmetric
+
//////////////////////////////////////////////////////////////////////////////
+
+ ibset parsym::
+ symmetrised_regions (gh const& hh, dh const& dd,
+ level_boundary const& bnd,
+ vector<ibset> const& regions, int const rl)
+ {
+ ibbox const& baseextent = hh.baseextent(0,rl);
+
+ ibset symmetrised = regions.at(rl);
+ for (ibset::const_iterator
+ ibb = regions.at(rl).begin(); ibb != regions.at(rl).end();
++ ibb)
+ {
+ ibbox const& bb = *ibb;
+
+ bvect const lower_is_outside_lower =
+ bb.lower() - bnd.min_bnd_dist_away[0] * bb.stride() <=
+ bnd.level_physical_ilower;
+
+ // Treat z direction
+ int const dir = 2;
+ if (lower_is_outside_lower[dir]) {
+ ivect const ilo = bb.lower();
+ ivect const iup = bb.upper();
+ ivect const istr = bb.stride();
+ assert (istr[0] == istr[1]);
+
+ // Origin
+ assert (hh.refcent == vertex_centered or all (istr % 2 == 0));
+ rvect const axis ( (bnd.physical_lower[0] +
bnd.physical_upper[0]) / 2,
+ (bnd.physical_lower[1] +
bnd.physical_upper[1]) / 2,
+ bnd.physical_lower[2]);
+ ivect const iaxis0 = rpos2ipos (axis, bnd.origin, bnd.scale,
hh, rl);
+ assert (all ((iaxis0 - baseextent.lower()) % istr == 0));
+ ivect const iaxis1 = rpos2ipos1 (axis, bnd.origin, bnd.scale,
hh, rl);
+ assert (all ((iaxis1 - baseextent.lower()) % istr == 0));
+ ivect const offset = iaxis1 - iaxis0;
+ assert (all (offset % istr == 0));
+ if (hh.refcent == vertex_centered) {
+ assert (all (offset >= 0 and offset < 2*istr));
+ assert (all ((iaxis0 + iaxis1 - offset) % (2*istr) == 0));
+ } else {
+ // The offset may be negative because both boundaries are
+ // shifted inwards by 1/2 grid spacing, and therefore iaxis0
+ // < iaxis1 + istr
+ assert (all (offset >= -istr and offset < istr));
+ assert (all ((iaxis0 + iaxis1 - offset) % (2*istr) == istr));
+ assert (all (istr % 2 == 0));
+ }
+ ivect const iaxis = (iaxis0 + iaxis1 - offset) / 2;
+ ivect const neg_ilo = (2*iaxis+offset) - ilo;
+ ivect const neg_iup = (2*iaxis+offset) - iup;
+
+ // Rotate 180 degrees about z axis
+ ivect const new_ilo (neg_iup[0], neg_iup[1], neg_iup[2]);
+ ivect const new_iup (neg_ilo[0], neg_ilo[1], neg_ilo[2]);
+ ivect const new_istr (istr);
+
+ ibbox const new_bb (new_ilo, new_iup, new_istr);
+ // Will be clipped later
+ // assert (new_bb.is_contained_in (baseextent));
+
+ // symmetrised |= new_bb & baseextent;
+ symmetrised |= new_bb;
+ }
+ }
+
+ return symmetrised;
+ }
+
+ bool parsym::
+ test_impl (gh const& hh, dh const& dd,
+ level_boundary const& bnd,
+ vector<ibset> const& regions, int const rl)
+ {
+ DECLARE_CCTK_PARAMETERS;
+
+ if (not symmetry_parity) return true;
+
+ ibset const symmetrised = symmetrised_regions (hh, dd, bnd,
regions, rl);
+
+ // We cannot test for equality, since the difference may be
+ // outside of the domain (and hence irrelevant)
+ // return regions.AT(rl) == symmetrised;
+
+ // Test whether any part of the difference (i.e. that part of the
+ // level that would be added by symmetrising) is inside the
+ // domain. If the difference is outside, we can safely ignore it.
+ ibbox const& baseextent = hh.baseextent(0,rl);
+ ibset const difference = symmetrised - regions.AT(rl);
+ return (difference & baseextent).empty();
+ }
+ void parsym::
+ enforce_impl (gh const& hh, dh const& dd,
+ level_boundary const& bnd,
+ vector<ibset>& regions, int const rl)
+ {
+ DECLARE_CCTK_PARAMETERS;
+
+ assert (symmetry_parity);
+
+ if (veryverbose) {
+ cout << "Refinement level " << rl << ": making regions parity
symmetric...\n";
+ }
+
+ regions.AT(rl) = symmetrised_regions (hh, dd, bnd, regions, rl);
+
+ if (veryverbose) {
+ cout << " New regions are " << regions.at(rl) << "\n";
+ }
+ }
+
//////////////////////////////////////////////////////////////////////////////
// Make the boxes periodic in one direction
diff --git a/Carpet/CarpetRegrid2/src/property.hh
b/Carpet/CarpetRegrid2/src/property.hh
index d5540d6..b0080c7 100644
--- a/Carpet/CarpetRegrid2/src/property.hh
+++ b/Carpet/CarpetRegrid2/src/property.hh
@@ -112,6 +112,18 @@ namespace CarpetRegrid2 {
vector<ibset>& regions, int rl);
};
+ // Make the boxes parity symmetric
+ class parsym: public property {
+ ibset symmetrised_regions (gh const& hh, dh const& dd,
+ level_boundary const& bnd,
+ vector<ibset> const& regions, int rl);
+ bool test_impl (gh const& hh, dh const& dd,
+ level_boundary const& bnd,
+ vector<ibset> const& regions, int rl);
+ void enforce_impl (gh const& hh, dh const& dd,
+ level_boundary const& bnd,
+ vector<ibset>& regions, int rl);
+ };
// Make the boxes rotating-180 symmetric
diff --git a/Carpet/CarpetRegrid2/src/regrid.cc
b/Carpet/CarpetRegrid2/src/regrid.cc
index 427d8b0..5b32a32 100644
--- a/Carpet/CarpetRegrid2/src/regrid.cc
+++ b/Carpet/CarpetRegrid2/src/regrid.cc
@@ -329,6 +329,7 @@ namespace CarpetRegrid2 {
properties.push_back (new snap_coarse());
properties.push_back (new rotsym90());
properties.push_back (new rotsym180());
+ properties.push_back (new parsym());
properties.push_back (new periodic<0>());
properties.push_back (new periodic<1>());
properties.push_back (new periodic<2>());
More information about the Users
mailing list