diff --git a/src/mpi/init/init_util.c b/src/mpi/init/init_util.c index 3f86e0fd86b..436bb87b722 100644 --- a/src/mpi/init/init_util.c +++ b/src/mpi/init/init_util.c @@ -5,6 +5,7 @@ #include "mpiimpl.h" #include "mpi_init.h" +#include "mpichinfo.h" /* The following routines provide a callback facility for modules that need some code called on exit. This method allows us to avoid forcing @@ -98,6 +99,7 @@ static void print_setting(const char *label, const char *value) void MPII_dump_debug_summary(void) { + printf("MPICH %s - %s\n", MPICH_VERSION, MPICH_VERSION_DATE); #ifdef HAVE_ERROR_CHECKING print_setting("error checking", "enabled"); #else diff --git a/src/mpid/ch4/shm/posix/posix_init.c b/src/mpid/ch4/shm/posix/posix_init.c index a991212b4f5..7d7f577deae 100644 --- a/src/mpid/ch4/shm/posix/posix_init.c +++ b/src/mpid/ch4/shm/posix/posix_init.c @@ -483,3 +483,25 @@ int MPIDI_POSIX_mpi_free_mem(void *ptr) { return MPIDIG_mpi_free_mem(ptr); } + +static int progress_throttle_id; + +int MPIDI_POSIX_progress_throttle_start(void) +{ + if (MPIDI_POSIX_global.shm_slab && progress_throttle_id == 0) { + MPL_atomic_int_t *counter_ptr = + &((MPIDI_POSIX_shm_t *) MPIDI_POSIX_global.shm_slab)->progress_throttle_counter; + progress_throttle_id = MPL_atomic_fetch_add_int(counter_ptr, 1) + 1; + } + return progress_throttle_id; +} + +void MPIDI_POSIX_progress_throttle_stop(void) +{ + if (MPIDI_POSIX_global.shm_slab && progress_throttle_id > 0) { + MPL_atomic_int_t *counter_ptr = + &((MPIDI_POSIX_shm_t *) MPIDI_POSIX_global.shm_slab)->progress_throttle_counter; + MPL_atomic_fetch_sub_int(counter_ptr, 1); + progress_throttle_id = 0; + } +} diff --git a/src/mpid/ch4/shm/posix/posix_pre.h b/src/mpid/ch4/shm/posix/posix_pre.h index 293bf2d5cca..bb33eab98c4 100644 --- a/src/mpid/ch4/shm/posix/posix_pre.h +++ b/src/mpid/ch4/shm/posix/posix_pre.h @@ -183,4 +183,11 @@ typedef struct { "**windows_mutex %s", "MPL_proc_mutex_unlock"); \ } while (0) +int MPIDI_POSIX_progress_throttle_start(void); +void MPIDI_POSIX_progress_throttle_stop(void); +#ifndef MPIDI_progress_throttle_start +#define MPIDI_progress_throttle_start MPIDI_POSIX_progress_throttle_start +#define MPIDI_progress_throttle_stop MPIDI_POSIX_progress_throttle_stop +#endif + #endif /* POSIX_PRE_H_INCLUDED */ diff --git a/src/mpid/ch4/shm/posix/posix_types.h b/src/mpid/ch4/shm/posix/posix_types.h index f8b0724fdc7..91332da7670 100644 --- a/src/mpid/ch4/shm/posix/posix_types.h +++ b/src/mpid/ch4/shm/posix/posix_types.h @@ -50,6 +50,10 @@ typedef struct { MPL_atomic_int_t num_shared_vci; /* number of processes currently using shm_vci_slab */ MPL_atomic_int_t shm_ready; /* root (1st proc that allocates shm_slab) set it to MPIDI_POSIX_READY_FLAG */ MPL_atomic_uint64_t shm_limit_counter; /* release_gather use this to track total amount of shared memory allocated */ + MPL_atomic_int_t progress_throttle_counter; /* When a process enter no progress state, it increment the counter and + * provide the process a throttle id. This allows some processes to skip + * throttle based on the throttle id. + */ MPL_atomic_int_t eager_ready[]; /* size of local_size. Each process update its flag to MPIDI_POSIX_READY_FLAG */ } MPIDI_POSIX_shm_t; diff --git a/src/mpid/ch4/src/ch4_progress.h b/src/mpid/ch4/src/ch4_progress.h index 67a244a23b0..bb5c46c1650 100644 --- a/src/mpid/ch4/src/ch4_progress.h +++ b/src/mpid/ch4/src/ch4_progress.h @@ -8,6 +8,8 @@ #include "ch4_impl.h" #include "stream_workq.h" +/* FIXME: configure check sched_yield */ +#include /* === BEGIN_MPI_T_CVAR_INFO_BLOCK === @@ -47,6 +49,29 @@ When MPIR_CVAR_CH4_PROGRESS_THROTTLE=true, MPIR_CVAR_CH4_PROGRESS_THROTTLE_NO_PROGRESS_COUNT is the number of consecutive polls that must fail to make progress before calling usleep(1) in the progress. A higher value makes the usleep less frequent, and a lower value makes the usleep more frequent. + + - name : MPIR_CVAR_CH4_PROGRESS_THROTTLE_MIN_PROCS + category : CH4 + type : int + default : 4 + class : none + verbosity : MPI_T_VERBOSITY_USER_BASIC + scope : MPI_T_SCOPE_LOCAL + description : >- + When MPIR_CVAR_CH4_PROGRESS_THROTTLE=true, do throttle when minimum of MPIR_CVAR_CH4_PROGRESS_THROTTLE_MIN_PROCS + processes enter the throttle state (no_progress_counter reach MPIR_CVAR_CH4_PROGRESS_THROTTLE_NO_PROGRESS_COUNT). + + - name : MPIR_CVAR_CH4_PROGRESS_THROTTLE_NUM_PAUSES + category : CH4 + type : int + default : 1 + class : none + verbosity : MPI_T_VERBOSITY_USER_BASIC + scope : MPI_T_SCOPE_LOCAL + description : >- + Set the number of PAUSE (or thread_yield) for each progress throttle. If set to 0 (default), it uses + usleep(1) for throttle. + === END_MPI_T_CVAR_INFO_BLOCK === */ @@ -163,9 +188,6 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_progress_test(MPID_Progress_state * state) for (int vci = 0; vci < MPIDI_global.n_vcis; vci++) { MPIDI_PROGRESS(vci, true); } - if (!made_progress && MPIR_CVAR_CH4_PROGRESS_THROTTLE) { - usleep(1); - } } else { for (int i = 0; i < state->vci_count; i++) { int vci = state->vci[i]; @@ -177,14 +199,27 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_progress_test(MPID_Progress_state * state) } #endif +#ifndef MPIDI_progress_throttle_start +#define MPIDI_progress_throttle_start() 1 +#define MPIDI_progress_throttle_stop() do { } while (0) +#endif fn_exit: MPIR_FUNC_EXIT; if (MPIR_CVAR_CH4_PROGRESS_THROTTLE) { if (made_progress) { no_progress_counter = 0; + MPIDI_progress_throttle_stop(); } else if (no_progress_counter > MPIR_CVAR_CH4_PROGRESS_THROTTLE_NO_PROGRESS_COUNT) { - no_progress_counter = 0; - usleep(1); + int throttle_id = MPIDI_progress_throttle_start(); + if (throttle_id > MPIR_CVAR_CH4_PROGRESS_THROTTLE_MIN_PROCS) { + if (MPIR_CVAR_CH4_PROGRESS_THROTTLE_NUM_PAUSES == 0) { + usleep(1); + } else { + for (int i = 0; i < MPIR_CVAR_CH4_PROGRESS_THROTTLE_NUM_PAUSES; i++) { + sched_yield(); + } + } + } } else { no_progress_counter++; }