[Users] Issues with Carpet run stalling on RIT cluster

Yosef Zlochower yosef at astro.rit.edu
Fri May 31 06:55:54 CDT 2013


Hi,
   I was wondering if anyone else saw this issue, or if it's specific to 
the particular cluster I am using.
The problem is that the runs stall after a few hours. Basically, two 
processes (of 10-12)
get killed due to a failed assert within the mpi library (I'm using 
openmpi on a qlogic infinipath
IB network). From the backtrace, it looks like the recompose step is the 
one initiating the MPI
call.  Despite the name of the executable, this was actually run on a 
sandybridge processor.
The OS is CentOs6, and I'm using  icc version 13.1.1. The ET version is 
Orsted.

I saw this issue with many different runs. However, the backtrace and 
assert failure below came from
runs that used the "ParitySymmetry" thorn and the associated changes to 
CarpetRegrid2. I included
the patch to CarpetRegrid2 at the bottom.

n015:3.0.Assertion failure at ptl.c:200: nbytes == msglen
n018:3.0.Assertion failure at ptl.c:200: nbytes == msglen

Backtrace from rank 2 pid 27135:
1. /lib64/libc.so.6() [0x32a4832920]
2. /lib64/libc.so.6(gsignal+0x35) [0x32a48328a5]
3. /lib64/libc.so.6(abort+0x175) [0x32a4834085]
4. /usr/lib64/libpsm_infinipath.so.1(+0x17b6d) [0x7f577e6c6b6d]
5. /usr/lib64/libpsm_infinipath.so.1(psmi_handle_error+0x261) 
[0x7f577e6c6dd1]
6. /usr/lib64/libpsm_infinipath.so.1(psmi_am_mq_handler_rtsmatch+0x17a) 
[0x7f577e6c1f6a]
7. /usr/lib64/libpsm_infinipath.so.1(+0xa832) [0x7f577e6b9832]
8. /usr/lib64/libpsm_infinipath.so.1(+0xd90f) [0x7f577e6bc90f]
9. /usr/lib64/libpsm_infinipath.so.1(psmi_poll_internal+0x29) 
[0x7f577e6df309]
a. /usr/lib64/libpsm_infinipath.so.1(psm_mq_ipeek+0xa5) [0x7f577e6dde05]
b. /usr/mpi/gcc/openmpi-1.4.3-qlc/lib64/openmpi/mca_mtl_psm.so(+0x15f4) 
[0x7f577e9025f4]
c. 
/usr/mpi/gcc/openmpi-1.4.3-qlc/lib64/libopen-pal.so.0(opal_progress+0x5a) [0x7f57815f30fa]
d. /usr/mpi/gcc/openmpi-1.4.3-qlc/lib64/libmpi.so.0(+0x35685) 
[0x7f5782f8a685]
e. /usr/mpi/gcc/openmpi-1.4.3-qlc/lib64/libmpi.so.0(PMPI_Waitall+0xa3) 
[0x7f5782fb6c73]
f. comm_state::step() 
[./cactus_lazevnehalem(_ZN10comm_state4stepEv+0x4a2) [0x8826c2]]
10. dh::recompose(int, bool) 
[./cactus_lazevnehalem(_ZN2dh9recomposeEib+0x218) [0x89d8a8]]
11. gh::recompose(int, bool) 
[./cactus_lazevnehalem(_ZN2gh9recomposeEib+0x52) [0x8dc722]]
12. Carpet::Recompose(_cGH const*, int, bool) 
[./cactus_lazevnehalem(_ZN6Carpet9RecomposeEPK4_cGHib+0xea) [0x7db6ea]]

diff --git a/Carpet/CarpetRegrid2/param.ccl b/Carpet/CarpetRegrid2/param.ccl
index c7327d2..4843abe 100644
--- a/Carpet/CarpetRegrid2/param.ccl
+++ b/Carpet/CarpetRegrid2/param.ccl
@@ -62,6 +62,11 @@ BOOLEAN symmetry_rotating180 "Ensure a 180 degree 
rotating symmetry about the z
  {
  } no

+BOOLEAN symmetry_parity "parity "
+{
+} no
+
+
  BOOLEAN symmetry_periodic_x "Ensure a periodicity symmetry in the x 
direction"
  {
  } no
diff --git a/Carpet/CarpetRegrid2/src/paramcheck.cc 
b/Carpet/CarpetRegrid2/src/paramcheck.cc
index 5cc8978..679a562 100644
--- a/Carpet/CarpetRegrid2/src/paramcheck.cc
+++ b/Carpet/CarpetRegrid2/src/paramcheck.cc
@@ -25,7 +25,7 @@ namespace CarpetRegrid2 {
      DECLARE_CCTK_ARGUMENTS;
      DECLARE_CCTK_PARAMETERS;

-    enum sym_t { sym_unknown, sym_90, sym_180 };
+    enum sym_t { sym_unknown, sym_90, sym_180, sym_parity };

      int num_params = 0;
      sym_t params = sym_unknown;
@@ -40,7 +40,13 @@ namespace CarpetRegrid2 {
        params = sym_180;
        param = "symmetry_rotating180";
      }
-
+
+    if (symmetry_parity) {
+      ++num_params;
+      params = sym_parity;
+      param = "symmetry_parity";
+    }
+
      int num_thorns = 0;
      sym_t thorns = sym_unknown;
      char const* thorn = "";
@@ -59,13 +65,18 @@ namespace CarpetRegrid2 {
        thorns = sym_180;
        thorn = "RotatingSymmetry180";
      }
-
+    if (CCTK_IsThornActive ("ParitySymmetry")) {
+      ++num_thorns;
+      thorns = sym_parity;
+      thorn = "ParitySymmetry";
+    }
+
      if (num_params > 1) {
-      CCTK_PARAMWARN ("Too many of the symmetry parameters 
symmetry_rotating90 and symmetry_rotating180 are specified.  (At most 
one of these can be specified.)");
+      CCTK_PARAMWARN ("Too many of the symmetry parameters at least two 
of symmetry_rotating90, symmetry_rotating180, and parity_symmetry are 
specified.  (At most one of these can be specified.)");
      }

      if (num_thorns > 1) {
-      CCTK_PARAMWARN ("Too many of the symmetry thorns 
RotatingSymmetry90, RotatingSymmetry90r, and RotatingSymmetry180 are 
active.  (At most one of these can be active.)");
+      CCTK_PARAMWARN ("Too many of the symmetry thorns 
RotatingSymmetry90, RotatingSymmetry90r, RotatingSymmetry180, and 
ParitySymmetry are active.  (At most one of these can be active.)");
      }

      if (params != sym_unknown and thorns != sym_unknown and params != 
thorns) {
diff --git a/Carpet/CarpetRegrid2/src/property.cc 
b/Carpet/CarpetRegrid2/src/property.cc
index a568e82..c2a31a1 100644
--- a/Carpet/CarpetRegrid2/src/property.cc
+++ b/Carpet/CarpetRegrid2/src/property.cc
@@ -577,7 +577,121 @@ namespace CarpetRegrid2 {
      }
    }

+ 
//////////////////////////////////////////////////////////////////////////////
+  // Make the boxes  parity symmetric
+ 
//////////////////////////////////////////////////////////////////////////////
+
+  ibset parsym::
+  symmetrised_regions (gh const& hh, dh const& dd,
+                       level_boundary const& bnd,
+                       vector<ibset> const& regions, int const rl)
+  {
+    ibbox const& baseextent = hh.baseextent(0,rl);
+
+    ibset symmetrised = regions.at(rl);
+    for (ibset::const_iterator
+           ibb = regions.at(rl).begin(); ibb != regions.at(rl).end(); 
++ ibb)
+    {
+      ibbox const& bb = *ibb;
+
+      bvect const lower_is_outside_lower =
+        bb.lower() - bnd.min_bnd_dist_away[0] * bb.stride() <=
+        bnd.level_physical_ilower;
+
+      // Treat z direction
+      int const dir = 2;
+      if (lower_is_outside_lower[dir]) {
+        ivect const ilo = bb.lower();
+        ivect const iup = bb.upper();
+        ivect const istr = bb.stride();
+        assert (istr[0] == istr[1]);
+
+        // Origin
+        assert (hh.refcent == vertex_centered or all (istr % 2 == 0));
+        rvect const axis ( (bnd.physical_lower[0] + 
bnd.physical_upper[0]) / 2,
+                          (bnd.physical_lower[1] + 
bnd.physical_upper[1]) / 2,
+                          bnd.physical_lower[2]);
+        ivect const iaxis0 = rpos2ipos (axis, bnd.origin, bnd.scale, 
hh, rl);
+        assert (all ((iaxis0 - baseextent.lower()) % istr == 0));
+        ivect const iaxis1 = rpos2ipos1 (axis, bnd.origin, bnd.scale, 
hh, rl);
+        assert (all ((iaxis1 - baseextent.lower()) % istr == 0));
+        ivect const offset = iaxis1 - iaxis0;
+        assert (all (offset % istr == 0));
+        if (hh.refcent == vertex_centered) {
+          assert (all (offset >= 0 and offset < 2*istr));
+          assert (all ((iaxis0 + iaxis1 - offset) % (2*istr) == 0));
+        } else {
+          // The offset may be negative because both boundaries are
+          // shifted inwards by 1/2 grid spacing, and therefore iaxis0
+          // < iaxis1 + istr
+          assert (all (offset >= -istr and offset < istr));
+          assert (all ((iaxis0 + iaxis1 - offset) % (2*istr) == istr));
+          assert (all (istr % 2 == 0));
+        }
+        ivect const iaxis = (iaxis0 + iaxis1 - offset) / 2;
+        ivect const neg_ilo = (2*iaxis+offset) - ilo;
+        ivect const neg_iup = (2*iaxis+offset) - iup;
+
+        // Rotate 180 degrees about z axis
+        ivect const new_ilo (neg_iup[0], neg_iup[1], neg_iup[2]);
+        ivect const new_iup (neg_ilo[0], neg_ilo[1], neg_ilo[2]);
+        ivect const new_istr (istr);
+
+        ibbox const new_bb (new_ilo, new_iup, new_istr);
+        // Will be clipped later
+        // assert (new_bb.is_contained_in (baseextent));
+
+        // symmetrised |= new_bb & baseextent;
+        symmetrised |= new_bb;
+      }
+    }
+
+    return symmetrised;
+  }
+
+  bool parsym::
+  test_impl (gh const& hh, dh const& dd,
+             level_boundary const& bnd,
+             vector<ibset> const& regions, int const rl)
+  {
+    DECLARE_CCTK_PARAMETERS;
+
+    if (not symmetry_parity) return true;
+
+    ibset const symmetrised = symmetrised_regions (hh, dd, bnd, 
regions, rl);
+
+    // We cannot test for equality, since the difference may be
+    // outside of the domain (and hence irrelevant)
+    // return regions.AT(rl) == symmetrised;
+
+    // Test whether any part of the difference (i.e. that part of the
+    // level that would be added by symmetrising) is inside the
+    // domain. If the difference is outside, we can safely ignore it.
+    ibbox const& baseextent = hh.baseextent(0,rl);
+    ibset const difference = symmetrised - regions.AT(rl);
+    return (difference & baseextent).empty();
+  }

+  void parsym::
+  enforce_impl (gh const& hh, dh const& dd,
+                level_boundary const& bnd,
+                vector<ibset>& regions, int const rl)
+  {
+    DECLARE_CCTK_PARAMETERS;
+
+    assert (symmetry_parity);
+
+    if (veryverbose) {
+      cout << "Refinement level " << rl << ": making regions parity 
symmetric...\n";
+    }
+
+    regions.AT(rl) = symmetrised_regions (hh, dd, bnd, regions, rl);
+
+    if (veryverbose) {
+      cout << "   New regions are " << regions.at(rl) << "\n";
+    }
+  }
+

//////////////////////////////////////////////////////////////////////////////
    // Make the boxes periodic in one direction
diff --git a/Carpet/CarpetRegrid2/src/property.hh 
b/Carpet/CarpetRegrid2/src/property.hh
index d5540d6..b0080c7 100644
--- a/Carpet/CarpetRegrid2/src/property.hh
+++ b/Carpet/CarpetRegrid2/src/property.hh
@@ -112,6 +112,18 @@ namespace CarpetRegrid2 {
                         vector<ibset>& regions, int rl);
    };

+  // Make the boxes parity symmetric
+  class parsym: public property {
+    ibset symmetrised_regions (gh const& hh, dh const& dd,
+                               level_boundary const& bnd,
+                               vector<ibset> const& regions, int rl);
+    bool test_impl (gh const& hh, dh const& dd,
+                    level_boundary const& bnd,
+                    vector<ibset> const& regions, int rl);
+    void enforce_impl (gh const& hh, dh const& dd,
+                       level_boundary const& bnd,
+                       vector<ibset>& regions, int rl);
+  };


    // Make the boxes rotating-180 symmetric
diff --git a/Carpet/CarpetRegrid2/src/regrid.cc 
b/Carpet/CarpetRegrid2/src/regrid.cc
index 427d8b0..5b32a32 100644
--- a/Carpet/CarpetRegrid2/src/regrid.cc
+++ b/Carpet/CarpetRegrid2/src/regrid.cc
@@ -329,6 +329,7 @@ namespace CarpetRegrid2 {
      properties.push_back (new snap_coarse());
      properties.push_back (new rotsym90());
      properties.push_back (new rotsym180());
+    properties.push_back (new parsym());
      properties.push_back (new periodic<0>());
      properties.push_back (new periodic<1>());
      properties.push_back (new periodic<2>());



More information about the Users mailing list