From 63d5cbe4d08282f03e151ee18b910e0d6a08d8cb Mon Sep 17 00:00:00 2001 From: "Jan Ciesko (-EXP)" Date: Thu, 19 Dec 2024 12:41:24 -0700 Subject: [PATCH 1/3] Modernize miniWeather --- c/CMakeLists.txt | 159 --- c/miniWeather_mpi.cpp | 907 ----------------- c/miniWeather_serial.cpp | 873 ----------------- common/apply-clang-format | 41 + cpp/CMakeLists.txt | 180 ++-- cpp/YAKL | 1 - cpp/cmake/modules/FindPnetCDF.cmake | 12 + {c => cpp/cmake}/utils.cmake | 0 cpp/miniWeather_mpi.cpp | 922 +++++++++--------- {c => cpp}/miniWeather_mpi_openacc.cpp | 1 + {c => cpp}/miniWeather_mpi_openmp.cpp | 1 + {c => cpp}/miniWeather_mpi_openmp45.cpp | 1 + cpp/miniWeather_serial.cpp | 850 ++++++++-------- {c/build => cpp/scripts}/check_output.sh | 0 .../scripts}/cmake_andes_gnu_cpu.sh | 0 .../scripts}/cmake_andes_intel_cpu.sh | 0 .../scripts}/cmake_andes_pgi_cpu.sh | 0 {c/build => cpp/scripts}/cmake_clean.sh | 0 {c/build => cpp/scripts}/cmake_crusher_amd.sh | 0 .../scripts}/cmake_crusher_cray.sh | 0 {c/build => cpp/scripts}/cmake_crusher_gnu.sh | 0 .../scripts}/cmake_fhqwhgads_gnu.sh | 0 .../scripts}/cmake_fhqwhgads_nvhpc.sh | 0 .../scripts}/cmake_perlmutter_gnu_cpu.sh | 0 {c/build => cpp/scripts}/cmake_summit_gnu.sh | 0 {c/build => cpp/scripts}/cmake_summit_ibm.sh | 0 .../scripts}/cmake_summit_nvhpc.sh | 0 .../scripts}/cmake_thatchroof_gnu.sh | 0 .../scripts}/cmake_thatchroof_nvhpc.sh | 0 cpp_yakl/CMakeLists.txt | 109 +++ cpp_yakl/cmake/modules/FindPnetCDF.cmake | 12 + cpp_yakl/cmake/modules/FindYAKL.cmake | 11 + cpp_yakl/cmake/utils.cmake | 48 + {cpp => cpp_yakl}/const.h | 0 .../miniWeather_mpi_parallelfor_simd_x.cpp | 0 cpp_yakl/miniWeather_mpi.cpp | 912 +++++++++++++++++ .../miniWeather_mpi_parallelfor.cpp | 79 +- cpp_yakl/miniWeather_serial.cpp | 850 ++++++++++++++++ .../scripts}/check_output.sh | 0 .../scripts}/cmake_andes_clang_cpu.sh | 0 .../scripts}/cmake_andes_gnu_cpu.sh | 0 .../scripts}/cmake_andes_intel_cpu.sh | 0 .../scripts}/cmake_ascent_gnu.sh | 0 .../build => cpp_yakl/scripts}/cmake_clean.sh | 0 .../scripts}/cmake_crusher_amd_cpu.sh | 0 .../scripts}/cmake_crusher_amd_gpu.sh | 0 .../scripts}/cmake_crusher_amd_openmp.sh | 0 .../scripts}/cmake_crusher_cray_cpu.sh | 0 .../scripts}/cmake_crusher_cray_openmp.sh | 0 .../scripts}/cmake_crusher_gnu_cpu.sh | 0 .../scripts}/cmake_crusher_gnu_openmp.sh | 0 .../scripts}/cmake_perlmutter_gnu_cpu.sh | 0 .../scripts}/cmake_perlmutter_gnu_gpu.sh | 0 .../scripts}/cmake_summit_gnu.sh | 0 .../scripts}/cmake_summit_ibm.sh | 0 .../scripts}/cmake_summit_pgi.sh | 0 .../scripts}/cmake_thatchroof_clang_cpu.sh | 0 .../scripts}/cmake_thatchroof_gnu_cpu.sh | 0 .../scripts}/cmake_thatchroof_gnu_gpu.sh | 0 .../scripts}/cmake_thatchroof_intel_cpu.sh | 0 .../scripts}/cmake_thatchroof_nvhpc_cpu.sh | 0 fortran/{build => scripts}/check_output.sh | 0 .../{build => scripts}/cmake_andes_gnu_cpu.sh | 0 .../cmake_andes_intel_cpu.sh | 0 .../{build => scripts}/cmake_andes_pgi_cpu.sh | 0 .../{build => scripts}/cmake_ascent_gnu.sh | 0 .../{build => scripts}/cmake_ascent_nvhpc.sh | 0 fortran/{build => scripts}/cmake_ascent_xl.sh | 0 fortran/{build => scripts}/cmake_clean.sh | 0 .../{build => scripts}/cmake_fhqwhgads_gnu.sh | 0 .../cmake_fhqwhgads_nvhpc.sh | 0 .../cmake_perlmutter_gnu_cpu.sh | 0 .../{build => scripts}/cmake_summit_gnu.sh | 0 .../{build => scripts}/cmake_summit_ibm.sh | 0 .../{build => scripts}/cmake_summit_nvhpc.sh | 0 .../cmake_thatchroof_gnu.sh | 0 .../cmake_thatchroof_intel.sh | 0 .../cmake_thatchroof_nvhpc.sh | 0 julia/{run/crusher => cpu}/Makefile | 13 +- julia/{run => scripts}/crusher/Manifest.toml | 0 julia/{run => scripts}/crusher/Project.toml | 0 81 files changed, 2994 insertions(+), 2988 deletions(-) delete mode 100644 c/CMakeLists.txt delete mode 100644 c/miniWeather_mpi.cpp delete mode 100644 c/miniWeather_serial.cpp create mode 100755 common/apply-clang-format delete mode 160000 cpp/YAKL create mode 100644 cpp/cmake/modules/FindPnetCDF.cmake rename {c => cpp/cmake}/utils.cmake (100%) rename {c => cpp}/miniWeather_mpi_openacc.cpp (99%) rename {c => cpp}/miniWeather_mpi_openmp.cpp (99%) rename {c => cpp}/miniWeather_mpi_openmp45.cpp (99%) rename {c/build => cpp/scripts}/check_output.sh (100%) rename {c/build => cpp/scripts}/cmake_andes_gnu_cpu.sh (100%) rename {c/build => cpp/scripts}/cmake_andes_intel_cpu.sh (100%) rename {c/build => cpp/scripts}/cmake_andes_pgi_cpu.sh (100%) rename {c/build => cpp/scripts}/cmake_clean.sh (100%) rename {c/build => cpp/scripts}/cmake_crusher_amd.sh (100%) rename {c/build => cpp/scripts}/cmake_crusher_cray.sh (100%) rename {c/build => cpp/scripts}/cmake_crusher_gnu.sh (100%) rename {c/build => cpp/scripts}/cmake_fhqwhgads_gnu.sh (100%) rename {c/build => cpp/scripts}/cmake_fhqwhgads_nvhpc.sh (100%) rename {c/build => cpp/scripts}/cmake_perlmutter_gnu_cpu.sh (100%) rename {c/build => cpp/scripts}/cmake_summit_gnu.sh (100%) rename {c/build => cpp/scripts}/cmake_summit_ibm.sh (100%) rename {c/build => cpp/scripts}/cmake_summit_nvhpc.sh (100%) rename {c/build => cpp/scripts}/cmake_thatchroof_gnu.sh (100%) rename {c/build => cpp/scripts}/cmake_thatchroof_nvhpc.sh (100%) create mode 100644 cpp_yakl/CMakeLists.txt create mode 100644 cpp_yakl/cmake/modules/FindPnetCDF.cmake create mode 100644 cpp_yakl/cmake/modules/FindYAKL.cmake create mode 100644 cpp_yakl/cmake/utils.cmake rename {cpp => cpp_yakl}/const.h (100%) rename {cpp => cpp_yakl}/experimental/miniWeather_mpi_parallelfor_simd_x.cpp (100%) create mode 100644 cpp_yakl/miniWeather_mpi.cpp rename {cpp => cpp_yakl}/miniWeather_mpi_parallelfor.cpp (92%) create mode 100644 cpp_yakl/miniWeather_serial.cpp rename {cpp/build => cpp_yakl/scripts}/check_output.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_andes_clang_cpu.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_andes_gnu_cpu.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_andes_intel_cpu.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_ascent_gnu.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_clean.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_crusher_amd_cpu.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_crusher_amd_gpu.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_crusher_amd_openmp.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_crusher_cray_cpu.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_crusher_cray_openmp.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_crusher_gnu_cpu.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_crusher_gnu_openmp.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_perlmutter_gnu_cpu.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_perlmutter_gnu_gpu.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_summit_gnu.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_summit_ibm.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_summit_pgi.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_thatchroof_clang_cpu.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_thatchroof_gnu_cpu.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_thatchroof_gnu_gpu.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_thatchroof_intel_cpu.sh (100%) rename {cpp/build => cpp_yakl/scripts}/cmake_thatchroof_nvhpc_cpu.sh (100%) rename fortran/{build => scripts}/check_output.sh (100%) rename fortran/{build => scripts}/cmake_andes_gnu_cpu.sh (100%) rename fortran/{build => scripts}/cmake_andes_intel_cpu.sh (100%) rename fortran/{build => scripts}/cmake_andes_pgi_cpu.sh (100%) rename fortran/{build => scripts}/cmake_ascent_gnu.sh (100%) rename fortran/{build => scripts}/cmake_ascent_nvhpc.sh (100%) rename fortran/{build => scripts}/cmake_ascent_xl.sh (100%) rename fortran/{build => scripts}/cmake_clean.sh (100%) rename fortran/{build => scripts}/cmake_fhqwhgads_gnu.sh (100%) rename fortran/{build => scripts}/cmake_fhqwhgads_nvhpc.sh (100%) rename fortran/{build => scripts}/cmake_perlmutter_gnu_cpu.sh (100%) rename fortran/{build => scripts}/cmake_summit_gnu.sh (100%) rename fortran/{build => scripts}/cmake_summit_ibm.sh (100%) rename fortran/{build => scripts}/cmake_summit_nvhpc.sh (100%) rename fortran/{build => scripts}/cmake_thatchroof_gnu.sh (100%) rename fortran/{build => scripts}/cmake_thatchroof_intel.sh (100%) rename fortran/{build => scripts}/cmake_thatchroof_nvhpc.sh (100%) rename julia/{run/crusher => cpu}/Makefile (79%) rename julia/{run => scripts}/crusher/Manifest.toml (100%) rename julia/{run => scripts}/crusher/Project.toml (100%) diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt deleted file mode 100644 index 3d469143..00000000 --- a/c/CMakeLists.txt +++ /dev/null @@ -1,159 +0,0 @@ -cmake_minimum_required(VERSION 3.0) -project(miniWeather CXX) - -enable_testing() - -include(utils.cmake) - - -############################################################ -## Set Parameters -############################################################ -if ("${NX}" STREQUAL "") - SET(NX 100) -endif() -if ("${NZ}" STREQUAL "") - SET(NZ 50) -endif() -if ("${SIM_TIME}" STREQUAL "") - SET(SIM_TIME 1000) -endif() -if ("${OUT_FREQ}" STREQUAL "") - SET(OUT_FREQ 10) -endif() -if ("${DATA_SPEC}" STREQUAL "") - SET(DATA_SPEC DATA_SPEC_THERMAL) -endif() -SET(EXE_DEFS "-D_NX=${NX} -D_NZ=${NZ} -D_SIM_TIME=${SIM_TIME} -D_OUT_FREQ=${OUT_FREQ} -D_DATA_SPEC=${DATA_SPEC}") -SET(TEST_DEFS "-D_NX=100 -D_NZ=50 -D_SIM_TIME=400 -D_OUT_FREQ=400 -D_DATA_SPEC=DATA_SPEC_THERMAL") - - -############################################################ -## Append CXXFLAGS -############################################################ -SET(CMAKE_CXX_FLAGS "${CXXFLAGS}") - - -############################################################ -## Compile the serial version -############################################################ -add_executable(serial miniWeather_serial.cpp) -set_target_properties(serial PROPERTIES COMPILE_FLAGS "${EXE_DEFS}") - -add_executable(serial_test miniWeather_serial.cpp) -set_target_properties(serial_test PROPERTIES COMPILE_FLAGS "${TEST_DEFS}") - -if (NOT ("${LDFLAGS}" STREQUAL "") ) - target_link_libraries(serial "${LDFLAGS}") - target_link_libraries(serial_test "${LDFLAGS}") -endif() -if (NOT ("${SERIAL_LINK_FLAGS}" STREQUAL "") ) - target_link_libraries(serial "${SERIAL_LINK_FLAGS}") - target_link_libraries(serial_test "${SERIAL_LINK_FLAGS}") -endif() - -add_test(NAME SERIAL_TEST COMMAND ./check_output.sh ./serial_test 1e-13 4.5e-5 ) - - -############################################################ -## Compile the MPI version -############################################################ -add_executable(mpi miniWeather_mpi.cpp) -set_target_properties(mpi PROPERTIES COMPILE_FLAGS "${EXE_DEFS}") - -add_executable(mpi_test miniWeather_mpi.cpp) -set_target_properties(mpi_test PROPERTIES COMPILE_FLAGS "${TEST_DEFS}") - -if (NOT ("${LDFLAGS}" STREQUAL "") ) - target_link_libraries(mpi "${LDFLAGS}") - target_link_libraries(mpi_test "${LDFLAGS}") -endif() -if (NOT ("${MPI_LINK_FLAGS}" STREQUAL "") ) - target_link_libraries(mpi "${MPI_LINK_FLAGS}") - target_link_libraries(mpi_test "${MPI_LINK_FLAGS}") -endif() - -add_test(NAME MPI_TEST COMMAND ./check_output.sh ./mpi_test 1e-13 4.5e-5 ) - - -############################################################ -## Compile the MPI + OpenMP version -############################################################ -if (NOT ("${OPENMP_FLAGS}" STREQUAL "") ) - add_executable(openmp miniWeather_mpi_openmp.cpp) - set_target_properties(openmp PROPERTIES COMPILE_FLAGS "${EXE_DEFS} ${OPENMP_FLAGS}") - - add_executable(openmp_test miniWeather_mpi_openmp.cpp) - set_target_properties(openmp_test PROPERTIES COMPILE_FLAGS "${TEST_DEFS} ${OPENMP_FLAGS}") - - if (NOT ("${LDFLAGS}" STREQUAL "") ) - target_link_libraries(openmp "${LDFLAGS}") - target_link_libraries(openmp_test "${LDFLAGS}") - endif() - if ("${OPENMP_LINK_FLAGS}" STREQUAL "") - SET(OPENMP_LINK_FLAGS ${OPENMP_FLAGS}) - endif() - target_link_libraries(openmp "${OPENMP_LINK_FLAGS}") - target_link_libraries(openmp_test "${OPENMP_LINK_FLAGS}") - - add_test(NAME OPENMP_TEST COMMAND ./check_output.sh ./openmp_test 1e-13 4.5e-5 ) -endif() - - - -############################################################ -## Compile the MPI + OpenACC version -############################################################ -if (NOT ("${OPENACC_FLAGS}" STREQUAL "") ) - add_executable(openacc miniWeather_mpi_openacc.cpp) - set_target_properties(openacc PROPERTIES COMPILE_FLAGS "${EXE_DEFS} ${OPENACC_FLAGS}") - - add_executable(openacc_test miniWeather_mpi_openacc.cpp) - set_target_properties(openacc_test PROPERTIES COMPILE_FLAGS "${TEST_DEFS} ${OPENACC_FLAGS}") - - if (NOT ("${LDFLAGS}" STREQUAL "") ) - target_link_libraries(openacc "${LDFLAGS}") - target_link_libraries(openacc_test "${LDFLAGS}") - endif() - if ("${OPENACC_LINK_FLAGS}" STREQUAL "") - SET(OPENACC_LINK_FLAGS ${OPENACC_FLAGS}) - endif() - target_link_libraries(openacc "${OPENACC_LINK_FLAGS}") - target_link_libraries(openacc_test "${OPENACC_LINK_FLAGS}") - - add_test(NAME OPENACC_TEST COMMAND ./check_output.sh ./openacc_test 1e-13 4.5e-5 ) -endif() - - - -############################################################ -## Compile the MPI + OpenMP4.5 version -############################################################ -if (NOT ("${OPENMP45_FLAGS}" STREQUAL "") ) - add_executable(openmp45 miniWeather_mpi_openmp45.cpp) - set_target_properties(openmp45 PROPERTIES COMPILE_FLAGS "${EXE_DEFS} ${OPENMP45_FLAGS}") - - add_executable(openmp45_test miniWeather_mpi_openmp45.cpp) - set_target_properties(openmp45_test PROPERTIES COMPILE_FLAGS "${TEST_DEFS} ${OPENMP45_FLAGS}") - - if (NOT ("${LDFLAGS}" STREQUAL "") ) - target_link_libraries(openmp45 "${LDFLAGS}") - target_link_libraries(openmp45_test "${LDFLAGS}") - endif() - if ("${OPENMP45_LINK_FLAGS}" STREQUAL "") - SET(OPENMP45_LINK_FLAGS ${OPENMP45_FLAGS}) - endif() - target_link_libraries(openmp45 "${OPENMP45_LINK_FLAGS}") - target_link_libraries(openmp45_test "${OPENMP45_LINK_FLAGS}") - - # The XL compiler dumps out non-unique filenames that screw up parallel compilation - # So it must compile the test at a different time than the original executable - if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "XL") - add_dependencies(openmp45_test openmp45) - endif() - - add_test(NAME OPENMP45_TEST COMMAND ./check_output.sh ./openmp45_test 1e-13 4.5e-5 ) -endif() - - - diff --git a/c/miniWeather_mpi.cpp b/c/miniWeather_mpi.cpp deleted file mode 100644 index 26cf803d..00000000 --- a/c/miniWeather_mpi.cpp +++ /dev/null @@ -1,907 +0,0 @@ - -////////////////////////////////////////////////////////////////////////////////////////// -// miniWeather -// Author: Matt Norman , Oak Ridge National Laboratory -// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows -// For documentation, please see the attached documentation in the "documentation" folder -// -////////////////////////////////////////////////////////////////////////////////////////// - -#include -#include -#include -#include -#include -#include -#include "pnetcdf.h" -#include - -constexpr double pi = 3.14159265358979323846264338327; //Pi -constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) -constexpr double cp = 1004.; //Specific heat of dry air at constant pressure -constexpr double cv = 717.; //Specific heat of dry air at constant volume -constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) -constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals -constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) -constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) -//Define domain and stability-related constants -constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) -constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) -constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] -constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) -constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) -constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction -constexpr int sten_size = 4; //Size of the stencil used for interpolation - -//Parameters for indexing and flags -constexpr int NUM_VARS = 4; //Number of fluid state variables -constexpr int ID_DENS = 0; //index for density ("rho") -constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") -constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") -constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") -constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction -constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction -constexpr int DATA_SPEC_COLLISION = 1; -constexpr int DATA_SPEC_THERMAL = 2; -constexpr int DATA_SPEC_GRAVITY_WAVES = 3; -constexpr int DATA_SPEC_DENSITY_CURRENT = 5; -constexpr int DATA_SPEC_INJECTION = 6; - -constexpr int nqpoints = 3; -constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; -constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; - -/////////////////////////////////////////////////////////////////////////////////////// -// BEGIN USER-CONFIGURABLE PARAMETERS -/////////////////////////////////////////////////////////////////////////////////////// -//The x-direction length is twice as long as the z-direction length -//So, you'll want to have nx_glob be twice as large as nz_glob -int constexpr nx_glob = _NX; //Number of total cells in the x-direction -int constexpr nz_glob = _NZ; //Number of total cells in the z-direction -double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation -double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) -int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data -double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction -double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction -/////////////////////////////////////////////////////////////////////////////////////// -// END USER-CONFIGURABLE PARAMETERS -/////////////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////////////// -// Variables that are initialized but remain static over the course of the simulation -/////////////////////////////////////////////////////////////////////////////////////// -double dt; //Model time step (seconds) -int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task -int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task -int nranks, myrank; //Number of MPI ranks and my rank id -int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain -int mainproc; //Am I the main process (rank == 0)? -double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) -double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) -double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) -double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) -double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) - -/////////////////////////////////////////////////////////////////////////////////////// -// Variables that are dynamics over the course of the simulation -/////////////////////////////////////////////////////////////////////////////////////// -double etime; //Elapsed model time -double output_counter; //Helps determine when it's time to do output -//Runtime variable arrays -double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) -double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) -double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) -double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) -double *sendbuf_l; //Buffer to send data to the left MPI rank -double *sendbuf_r; //Buffer to send data to the right MPI rank -double *recvbuf_l; //Buffer to receive data from the left MPI rank -double *recvbuf_r; //Buffer to receive data from the right MPI rank -int num_out = 0; //The number of outputs performed so far -int direction_switch = 1; -double mass0, te0; //Initial domain totals for mass and total energy -double mass , te ; //Domain totals for mass and total energy - -//How is this not in the standard?! -double dmin( double a , double b ) { if (a= 0) output(state,etime); - - //////////////////////////////////////////////////// - // MAIN TIME STEP LOOP - //////////////////////////////////////////////////// - auto t1 = std::chrono::steady_clock::now(); - while (etime < sim_time) { - //If the time step leads to exceeding the simulation time, shorten it for the last step - if (etime + dt > sim_time) { dt = sim_time - etime; } - //Perform a single time step - perform_timestep(state,state_tmp,flux,tend,dt); - //Inform the user -#ifndef NO_INFORM - if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } -#endif - //Update the elapsed time and output counter - etime = etime + dt; - output_counter = output_counter + dt; - //If it's time for output, reset the counter, and do output - if (output_freq >= 0 && output_counter >= output_freq) { - output_counter = output_counter - output_freq; - output(state,etime); - } - } - auto t2 = std::chrono::steady_clock::now(); - if (mainproc) { - std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; - } - - //Final reductions for mass, kinetic energy, and total energy - reductions(mass,te); - - if (mainproc) { - printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); - printf( "d_te: %le\n" , (te - te0 )/te0 ); - } - - finalize(); -} - - -//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator -//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the -//order of directions is alternated each time step. -//The Runge-Kutta method used here is defined as follows: -// q* = q[n] + dt/3 * rhs(q[n]) -// q** = q[n] + dt/2 * rhs(q* ) -// q[n+1] = q[n] + dt/1 * rhs(q** ) -void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) { - if (direction_switch) { - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); - } else { - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); - } - if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } -} - - -//Perform a single semi-discretized step in time with the form: -//state_out = state_init + dt * rhs(state_forcing) -//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out -void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) { - int i, k, ll, inds, indt, indw; - double x, z, wpert, dist, x0, z0, xrad, zrad, amp; - if (dir == DIR_X) { - //Set the halo values for this MPI task's fluid state in the x-direction - set_halo_values_x(state_forcing); - //Compute the time tendencies for the fluid state in the x-direction - compute_tendencies_x(state_forcing,flux,tend,dt); - } else if (dir == DIR_Z) { - //Set the halo values for this MPI task's fluid state in the z-direction - set_halo_values_z(state_forcing); - //Compute the time tendencies for the fluid state in the z-direction - compute_tendencies_z(state_forcing,flux,tend,dt); - } - - ///////////////////////////////////////////////// - // TODO: THREAD ME - ///////////////////////////////////////////////// - //Apply the tendencies to the fluid state - for (ll=0; ll , Oak Ridge National Laboratory -// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows -// For documentation, please see the attached documentation in the "documentation" folder -// -////////////////////////////////////////////////////////////////////////////////////////// - -#include -#include -#include -#include -#include -#include -#include "pnetcdf.h" -#include - -constexpr double pi = 3.14159265358979323846264338327; //Pi -constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) -constexpr double cp = 1004.; //Specific heat of dry air at constant pressure -constexpr double cv = 717.; //Specific heat of dry air at constant volume -constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) -constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals -constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) -constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) -//Define domain and stability-related constants -constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) -constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) -constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] -constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) -constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) -constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction -constexpr int sten_size = 4; //Size of the stencil used for interpolation - -//Parameters for indexing and flags -constexpr int NUM_VARS = 4; //Number of fluid state variables -constexpr int ID_DENS = 0; //index for density ("rho") -constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") -constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") -constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") -constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction -constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction -constexpr int DATA_SPEC_COLLISION = 1; -constexpr int DATA_SPEC_THERMAL = 2; -constexpr int DATA_SPEC_GRAVITY_WAVES = 3; -constexpr int DATA_SPEC_DENSITY_CURRENT = 5; -constexpr int DATA_SPEC_INJECTION = 6; - -constexpr int nqpoints = 3; -constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; -constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; - -/////////////////////////////////////////////////////////////////////////////////////// -// BEGIN USER-CONFIGURABLE PARAMETERS -/////////////////////////////////////////////////////////////////////////////////////// -//The x-direction length is twice as long as the z-direction length -//So, you'll want to have nx_glob be twice as large as nz_glob -int constexpr nx_glob = _NX; //Number of total cells in the x-direction -int constexpr nz_glob = _NZ; //Number of total cells in the z-direction -double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation -double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) -int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data -double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction -double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction -/////////////////////////////////////////////////////////////////////////////////////// -// END USER-CONFIGURABLE PARAMETERS -/////////////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////////////// -// Variables that are initialized but remain static over the course of the simulation -/////////////////////////////////////////////////////////////////////////////////////// -double dt; //Model time step (seconds) -int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task -int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task -int nranks, myrank; //Number of MPI ranks and my rank id -int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain -int mainproc; //Am I the main process (rank == 0)? -double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) -double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) -double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) -double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) -double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) - -/////////////////////////////////////////////////////////////////////////////////////// -// Variables that are dynamics over the course of the simulation -/////////////////////////////////////////////////////////////////////////////////////// -double etime; //Elapsed model time -double output_counter; //Helps determine when it's time to do output -//Runtime variable arrays -double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) -double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) -double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) -double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) -int num_out = 0; //The number of outputs performed so far -int direction_switch = 1; -double mass0, te0; //Initial domain totals for mass and total energy -double mass , te ; //Domain totals for mass and total energy - -//How is this not in the standard?! -double dmin( double a , double b ) { if (a sim_time) { dt = sim_time - etime; } - //Perform a single time step - perform_timestep(state,state_tmp,flux,tend,dt); - //Inform the user -#ifndef NO_INFORM - if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } -#endif - //Update the elapsed time and output counter - etime = etime + dt; - output_counter = output_counter + dt; - //If it's time for output, reset the counter, and do output - if (output_counter >= output_freq) { - output_counter = output_counter - output_freq; - output(state,etime); - } - } - auto t2 = std::chrono::steady_clock::now(); - if (mainproc) { - std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; - } - - //Final reductions for mass, kinetic energy, and total energy - reductions(mass,te); - - if (mainproc) { - printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); - printf( "d_te: %le\n" , (te - te0 )/te0 ); - } - - finalize(); -} - - -//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator -//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the -//order of directions is alternated each time step. -//The Runge-Kutta method used here is defined as follows: -// q* = q[n] + dt/3 * rhs(q[n]) -// q** = q[n] + dt/2 * rhs(q* ) -// q[n+1] = q[n] + dt/1 * rhs(q** ) -void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) { - if (direction_switch) { - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); - } else { - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); - } - if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } -} - - -//Perform a single semi-discretized step in time with the form: -//state_out = state_init + dt * rhs(state_forcing) -//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out -void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) { - int i, k, ll, inds, indt, indw; - double x, z, wpert, dist, x0, z0, xrad, zrad, amp; - if (dir == DIR_X) { - //Set the halo values for this MPI task's fluid state in the x-direction - set_halo_values_x(state_forcing); - //Compute the time tendencies for the fluid state in the x-direction - compute_tendencies_x(state_forcing,flux,tend,dt); - } else if (dir == DIR_Z) { - //Set the halo values for this MPI task's fluid state in the z-direction - set_halo_values_z(state_forcing); - //Compute the time tendencies for the fluid state in the z-direction - compute_tendencies_z(state_forcing,flux,tend,dt); - } - - ///////////////////////////////////////////////// - // TODO: THREAD ME - ///////////////////////////////////////////////// - //Apply the tendencies to the fluid state - for (ll=0; ll +#include #include -#include #include -#include "const.h" +#include +#include #include "pnetcdf.h" #include -// We're going to define all arrays on the host because this doesn't use parallel_for -typedef yakl::Array real1d; -typedef yakl::Array real2d; -typedef yakl::Array real3d; -typedef yakl::Array doub1d; -typedef yakl::Array doub2d; -typedef yakl::Array doub3d; - -typedef yakl::Array realConst1d; -typedef yakl::Array realConst2d; -typedef yakl::Array realConst3d; -typedef yakl::Array doubConst1d; -typedef yakl::Array doubConst2d; -typedef yakl::Array doubConst3d; +constexpr double pi = 3.14159265358979323846264338327; //Pi +constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) +constexpr double cp = 1004.; //Specific heat of dry air at constant pressure +constexpr double cv = 717.; //Specific heat of dry air at constant volume +constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) +constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals +constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) +constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) + +//Define domain and stability-related constants +constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) +constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) +constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] +constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) +constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) +constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction +constexpr int sten_size = 4; //Size of the stencil used for interpolation + +//Parameters for indexing and flags +constexpr int NUM_VARS = 4; //Number of fluid state variables +constexpr int ID_DENS = 0; //index for density ("rho") +constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") +constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") +constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") +constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction +constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction +constexpr int DATA_SPEC_COLLISION = 1; +constexpr int DATA_SPEC_THERMAL = 2; +constexpr int DATA_SPEC_GRAVITY_WAVES = 3; +constexpr int DATA_SPEC_DENSITY_CURRENT = 5; +constexpr int DATA_SPEC_INJECTION = 6; + +constexpr int nqpoints = 3; +constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; +constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; + +/////////////////////////////////////////////////////////////////////////////////////// +// BEGIN USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// +//The x-direction length is twice as long as the z-direction length +//So, you'll want to have nx_glob be twice as large as nz_glob +int constexpr nx_glob = _NX; //Number of total cells in the x-direction +int constexpr nz_glob = _NZ; //Number of total cells in the z-direction +double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation +double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) +int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data +double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction +double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction +/////////////////////////////////////////////////////////////////////////////////////// +// END USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////// // Variables that are initialized but remain static over the course of the simulation /////////////////////////////////////////////////////////////////////////////////////// -struct Fixed_data { - int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task - int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task - int nranks, myrank; //Number of MPI ranks and my rank id - int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain - int mainproc; //Am I the main process (rank == 0)? - realConst1d hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) - realConst1d hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) - realConst1d hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) - realConst1d hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) - realConst1d hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) -}; +double dt; //Model time step (seconds) +int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task +int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task +int nranks, myrank; //Number of MPI ranks and my rank id +int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain +int mainproc; //Am I the main process (rank == 0)? +double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) +double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) +double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are dynamics over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double etime; //Elapsed model time +double output_counter; //Helps determine when it's time to do output +//Runtime variable arrays +double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) +double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) +double *sendbuf_l; //Buffer to send data to the left MPI rank +double *sendbuf_r; //Buffer to send data to the right MPI rank +double *recvbuf_l; //Buffer to receive data from the left MPI rank +double *recvbuf_r; //Buffer to receive data from the right MPI rank +int num_out = 0; //The number of outputs performed so far +int direction_switch = 1; +double mass0, te0; //Initial domain totals for mass and total energy +double mass , te ; //Domain totals for mass and total energy + +//How is this not in the standard?! +double dmin( double a , double b ) { if (a= 0) { - output(state,etime,num_out,fixed_data); - } - - int direction_switch = 1; // Tells dimensionally split which order to take x,z solves - - //////////////////////////////////////////////////// - // MAIN TIME STEP LOOP - //////////////////////////////////////////////////// - auto t1 = std::chrono::steady_clock::now(); - while (etime < sim_time) { - //If the time step leads to exceeding the simulation time, shorten it for the last step - if (etime + dt > sim_time) { dt = sim_time - etime; } - //Perform a single time step - perform_timestep(state,dt,direction_switch,fixed_data); - //Inform the user - #ifndef NO_INFORM - if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } - #endif - //Update the elapsed time and output counter - etime = etime + dt; - output_counter = output_counter + dt; - //If it's time for output, reset the counter, and do output - if (output_freq >= 0 && output_counter >= output_freq) { - output_counter = output_counter - output_freq; - output(state,etime,num_out,fixed_data); - } - } - auto t2 = std::chrono::steady_clock::now(); - if (mainproc) { - std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + init( &argc , &argv ); + + //Initial reductions for mass, kinetic energy, and total energy + reductions(mass0,te0); + + //Output the initial state + if (output_freq >= 0) output(state,etime); + + //////////////////////////////////////////////////// + // MAIN TIME STEP LOOP + //////////////////////////////////////////////////// + auto t1 = std::chrono::steady_clock::now(); + while (etime < sim_time) { + //If the time step leads to exceeding the simulation time, shorten it for the last step + if (etime + dt > sim_time) { dt = sim_time - etime; } + //Perform a single time step + perform_timestep(state,state_tmp,flux,tend,dt); + //Inform the user +#ifndef NO_INFORM + if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } +#endif + //Update the elapsed time and output counter + etime = etime + dt; + output_counter = output_counter + dt; + //If it's time for output, reset the counter, and do output + if (output_freq >= 0 && output_counter >= output_freq) { + output_counter = output_counter - output_freq; + output(state,etime); } + } + auto t2 = std::chrono::steady_clock::now(); + if (mainproc) { + std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + } - //Final reductions for mass, kinetic energy, and total energy - double mass, te; - reductions(state,mass,te,fixed_data); + //Final reductions for mass, kinetic energy, and total energy + reductions(mass,te); - if (mainproc) { - printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); - printf( "d_te: %le\n" , (te - te0 )/te0 ); - } - - finalize(); + if (mainproc) { + printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); + printf( "d_te: %le\n" , (te - te0 )/te0 ); } - yakl::finalize(); - MPI_Finalize(); + + finalize(); } @@ -146,33 +184,28 @@ int main(int argc, char **argv) { //The dimensional splitting is a second-order-accurate alternating Strang splitting in which the //order of directions is alternated each time step. //The Runge-Kutta method used here is defined as follows: -// q* = q_n + dt/3 * rhs(q_n) -// q** = q_n + dt/2 * rhs(q* ) -// q_n+1 = q_n + dt/1 * rhs(q**) -void perform_timestep( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - - real3d state_tmp("state_tmp",NUM_VARS,nz+2*hs,nx+2*hs); - +// q* = q[n] + dt/3 * rhs(q[n]) +// q** = q[n] + dt/2 * rhs(q* ) +// q[n+1] = q[n] + dt/1 * rhs(q** ) +void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) { if (direction_switch) { //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data ); + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data ); + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); } else { //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data ); + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data ); + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); } if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } } @@ -181,54 +214,58 @@ void perform_timestep( real3d const &state , real dt , int &direction_switch , F //Perform a single semi-discretized step in time with the form: //state_out = state_init + dt * rhs(state_forcing) //Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out -void semi_discrete_step( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &i_beg = fixed_data.i_beg ; - auto &k_beg = fixed_data.k_beg ; - auto &hy_dens_cell = fixed_data.hy_dens_cell ; - - real3d tend("tend",NUM_VARS,nz,nx); - +void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) { + int i, k, ll, inds, indt, indw; + double x, z, wpert, dist, x0, z0, xrad, zrad, amp; if (dir == DIR_X) { //Set the halo values for this MPI task's fluid state in the x-direction - yakl::timer_start("halo x"); - set_halo_values_x(state_forcing,fixed_data); - yakl::timer_stop("halo x"); + set_halo_values_x(state_forcing); //Compute the time tendencies for the fluid state in the x-direction - yakl::timer_start("tendencies x"); - compute_tendencies_x(state_forcing,tend,dt,fixed_data); - yakl::timer_stop("tendencies x"); + compute_tendencies_x(state_forcing,flux,tend,dt); } else if (dir == DIR_Z) { //Set the halo values for this MPI task's fluid state in the z-direction - yakl::timer_start("halo z"); - set_halo_values_z(state_forcing,fixed_data); - yakl::timer_stop("halo z"); + set_halo_values_z(state_forcing); //Compute the time tendencies for the fluid state in the z-direction - yakl::timer_start("tendencies z"); - compute_tendencies_z(state_forcing,tend,dt,fixed_data); - yakl::timer_stop("tendencies z"); + compute_tendencies_z(state_forcing,flux,tend,dt); } ///////////////////////////////////////////////// - // TODO: MAKE THESE 3 LOOPS A PARALLEL_FOR + // TODO: THREAD ME ///////////////////////////////////////////////// //Apply the tendencies to the fluid state - yakl::timer_start("apply tendencies"); - for (int ll=0; ll stencil; - SArray d3_vals; - SArray vals; + for (k=0; k stencil; - SArray d3_vals; - SArray vals; + for (k=0; k qpoints; - SArray qweights; - - qpoints(0) = 0.112701665379258311482073460022; - qpoints(1) = 0.500000000000000000000000000000; - qpoints(2) = 0.887298334620741688517926539980; - - qweights(0) = 0.277777777777777777777777777779; - qweights(1) = 0.444444444444444444444444444444; - qweights(2) = 0.277777777777777777777777777779; - ////////////////////////////////////////////////////////////////////////// // Initialize the cell-averaged fluid state via Gauss-Legendre quadrature ////////////////////////////////////////////////////////////////////////// - ///////////////////////////////////////////////// - // TODO: MAKE THESE 2 LOOPS A PARALLEL_FOR - ///////////////////////////////////////////////// - for (int k=0; k +#include #include -#include #include -#include "const.h" +#include +#include #include "pnetcdf.h" #include -// We're going to define all arrays on the host because this doesn't use parallel_for -typedef yakl::Array real1d; -typedef yakl::Array real2d; -typedef yakl::Array real3d; -typedef yakl::Array doub1d; -typedef yakl::Array doub2d; -typedef yakl::Array doub3d; - -typedef yakl::Array realConst1d; -typedef yakl::Array realConst2d; -typedef yakl::Array realConst3d; -typedef yakl::Array doubConst1d; -typedef yakl::Array doubConst2d; -typedef yakl::Array doubConst3d; +constexpr double pi = 3.14159265358979323846264338327; //Pi +constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) +constexpr double cp = 1004.; //Specific heat of dry air at constant pressure +constexpr double cv = 717.; //Specific heat of dry air at constant volume +constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) +constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals +constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) +constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) +//Define domain and stability-related constants +constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) +constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) +constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] +constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) +constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) +constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction +constexpr int sten_size = 4; //Size of the stencil used for interpolation + +//Parameters for indexing and flags +constexpr int NUM_VARS = 4; //Number of fluid state variables +constexpr int ID_DENS = 0; //index for density ("rho") +constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") +constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") +constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") +constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction +constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction +constexpr int DATA_SPEC_COLLISION = 1; +constexpr int DATA_SPEC_THERMAL = 2; +constexpr int DATA_SPEC_GRAVITY_WAVES = 3; +constexpr int DATA_SPEC_DENSITY_CURRENT = 5; +constexpr int DATA_SPEC_INJECTION = 6; + +constexpr int nqpoints = 3; +constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; +constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; + +/////////////////////////////////////////////////////////////////////////////////////// +// BEGIN USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// +//The x-direction length is twice as long as the z-direction length +//So, you'll want to have nx_glob be twice as large as nz_glob +int constexpr nx_glob = _NX; //Number of total cells in the x-direction +int constexpr nz_glob = _NZ; //Number of total cells in the z-direction +double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation +double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) +int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data +double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction +double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction +/////////////////////////////////////////////////////////////////////////////////////// +// END USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////// // Variables that are initialized but remain static over the course of the simulation /////////////////////////////////////////////////////////////////////////////////////// -struct Fixed_data { - int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task - int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task - int nranks, myrank; //Number of MPI ranks and my rank id - int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain - int mainproc; //Am I the main process (rank == 0)? - realConst1d hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) - realConst1d hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) - realConst1d hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) - realConst1d hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) - realConst1d hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) -}; +double dt; //Model time step (seconds) +int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task +int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task +int nranks, myrank; //Number of MPI ranks and my rank id +int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain +int mainproc; //Am I the main process (rank == 0)? +double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) +double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) +double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are dynamics over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double etime; //Elapsed model time +double output_counter; //Helps determine when it's time to do output +//Runtime variable arrays +double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) +double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) +int num_out = 0; //The number of outputs performed so far +int direction_switch = 1; +double mass0, te0; //Initial domain totals for mass and total energy +double mass , te ; //Domain totals for mass and total energy + +//How is this not in the standard?! +double dmin( double a , double b ) { if (a= 0) { - output(state,etime,num_out,fixed_data); - } - - int direction_switch = 1; // Tells dimensionally split which order to take x,z solves - - //////////////////////////////////////////////////// - // MAIN TIME STEP LOOP - //////////////////////////////////////////////////// - auto t1 = std::chrono::steady_clock::now(); - while (etime < sim_time) { - //If the time step leads to exceeding the simulation time, shorten it for the last step - if (etime + dt > sim_time) { dt = sim_time - etime; } - //Perform a single time step - perform_timestep(state,dt,direction_switch,fixed_data); - //Inform the user - #ifndef NO_INFORM - if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } - #endif - //Update the elapsed time and output counter - etime = etime + dt; - output_counter = output_counter + dt; - //If it's time for output, reset the counter, and do output - if (output_freq >= 0 && output_counter >= output_freq) { - output_counter = output_counter - output_freq; - output(state,etime,num_out,fixed_data); - } - } - auto t2 = std::chrono::steady_clock::now(); - if (mainproc) { - std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + //////////////////////////////////////////////////// + // MAIN TIME STEP LOOP + //////////////////////////////////////////////////// + auto t1 = std::chrono::steady_clock::now(); + while (etime < sim_time) { + //If the time step leads to exceeding the simulation time, shorten it for the last step + if (etime + dt > sim_time) { dt = sim_time - etime; } + //Perform a single time step + perform_timestep(state,state_tmp,flux,tend,dt); + //Inform the user +#ifndef NO_INFORM + if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } +#endif + //Update the elapsed time and output counter + etime = etime + dt; + output_counter = output_counter + dt; + //If it's time for output, reset the counter, and do output + if (output_counter >= output_freq) { + output_counter = output_counter - output_freq; + output(state,etime); } + } + auto t2 = std::chrono::steady_clock::now(); + if (mainproc) { + std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + } - //Final reductions for mass, kinetic energy, and total energy - double mass, te; - reductions(state,mass,te,fixed_data); - - if (mainproc) { - printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); - printf( "d_te: %le\n" , (te - te0 )/te0 ); - } + //Final reductions for mass, kinetic energy, and total energy + reductions(mass,te); - finalize(); + if (mainproc) { + printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); + printf( "d_te: %le\n" , (te - te0 )/te0 ); } - yakl::finalize(); - MPI_Finalize(); -} + finalize(); +} //Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator //The dimensional splitting is a second-order-accurate alternating Strang splitting in which the //order of directions is alternated each time step. //The Runge-Kutta method used here is defined as follows: -// q* = q_n + dt/3 * rhs(q_n) -// q** = q_n + dt/2 * rhs(q* ) -// q_n+1 = q_n + dt/1 * rhs(q**) -void perform_timestep( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - - real3d state_tmp("state_tmp",NUM_VARS,nz+2*hs,nx+2*hs); - +// q* = q[n] + dt/3 * rhs(q[n]) +// q** = q[n] + dt/2 * rhs(q* ) +// q[n+1] = q[n] + dt/1 * rhs(q** ) +void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) { if (direction_switch) { //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data ); + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data ); + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); } else { //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data ); + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data ); + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); } if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } } @@ -181,203 +208,189 @@ void perform_timestep( real3d const &state , real dt , int &direction_switch , F //Perform a single semi-discretized step in time with the form: //state_out = state_init + dt * rhs(state_forcing) //Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out -void semi_discrete_step( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &i_beg = fixed_data.i_beg ; - auto &k_beg = fixed_data.k_beg ; - auto &hy_dens_cell = fixed_data.hy_dens_cell ; - - real3d tend("tend",NUM_VARS,nz,nx); - +void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) { + int i, k, ll, inds, indt, indw; + double x, z, wpert, dist, x0, z0, xrad, zrad, amp; if (dir == DIR_X) { //Set the halo values for this MPI task's fluid state in the x-direction - yakl::timer_start("halo x"); - set_halo_values_x(state_forcing,fixed_data); - yakl::timer_stop("halo x"); + set_halo_values_x(state_forcing); //Compute the time tendencies for the fluid state in the x-direction - yakl::timer_start("tendencies x"); - compute_tendencies_x(state_forcing,tend,dt,fixed_data); - yakl::timer_stop("tendencies x"); + compute_tendencies_x(state_forcing,flux,tend,dt); } else if (dir == DIR_Z) { //Set the halo values for this MPI task's fluid state in the z-direction - yakl::timer_start("halo z"); - set_halo_values_z(state_forcing,fixed_data); - yakl::timer_stop("halo z"); + set_halo_values_z(state_forcing); //Compute the time tendencies for the fluid state in the z-direction - yakl::timer_start("tendencies z"); - compute_tendencies_z(state_forcing,tend,dt,fixed_data); - yakl::timer_stop("tendencies z"); + compute_tendencies_z(state_forcing,flux,tend,dt); } ///////////////////////////////////////////////// - // TODO: MAKE THESE 3 LOOPS A PARALLEL_FOR + // TODO: THREAD ME ///////////////////////////////////////////////// //Apply the tendencies to the fluid state - yakl::timer_start("apply tendencies"); - for (int ll=0; ll stencil; - SArray d3_vals; - SArray vals; + for (k=0; k stencil; - SArray d3_vals; - SArray vals; + for (k=0; k qpoints; - SArray qweights; - - qpoints(0) = 0.112701665379258311482073460022; - qpoints(1) = 0.500000000000000000000000000000; - qpoints(2) = 0.887298334620741688517926539980; - - qweights(0) = 0.277777777777777777777777777779; - qweights(1) = 0.444444444444444444444444444444; - qweights(2) = 0.277777777777777777777777777779; - ////////////////////////////////////////////////////////////////////////// // Initialize the cell-averaged fluid state via Gauss-Legendre quadrature ////////////////////////////////////////////////////////////////////////// - ///////////////////////////////////////////////// - // TODO: MAKE THESE 2 LOOPS A PARALLEL_FOR - ///////////////////////////////////////////////// - for (int k=0; k , Oak Ridge National Laboratory +// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows +// For documentation, please see the attached documentation in the "documentation" folder +// +////////////////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include "const.h" +#include "pnetcdf.h" +#include + +// We're going to define all arrays on the host because this doesn't use parallel_for +typedef yakl::Array real1d; +typedef yakl::Array real2d; +typedef yakl::Array real3d; +typedef yakl::Array doub1d; +typedef yakl::Array doub2d; +typedef yakl::Array doub3d; + +typedef yakl::Array realConst1d; +typedef yakl::Array realConst2d; +typedef yakl::Array realConst3d; +typedef yakl::Array doubConst1d; +typedef yakl::Array doubConst2d; +typedef yakl::Array doubConst3d; + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are initialized but remain static over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +struct Fixed_data { + int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task + int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task + int nranks, myrank; //Number of MPI ranks and my rank id + int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain + int mainproc; //Am I the main process (rank == 0)? + realConst1d hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) + realConst1d hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) + realConst1d hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) + realConst1d hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) + realConst1d hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) +}; + +//Declaring the functions defined after "main" +void init ( real3d &state , real &dt , Fixed_data &fixed_data ); +void finalize ( ); +void injection ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); +void density_current ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); +void gravity_waves ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); +void thermal ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); +void collision ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); +void hydro_const_theta ( real z , real &r , real &t ); +void hydro_const_bvfreq ( real z , real bv_freq0 , real &r , real &t ); +real sample_ellipse_cosine( real x , real z , real amp , real x0 , real z0 , real xrad , real zrad ); +void output ( realConst3d state , real etime , int &num_out , Fixed_data const &fixed_data ); +void ncwrap ( int ierr , int line ); +void perform_timestep ( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ); +void semi_discrete_step ( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ); +void compute_tendencies_x ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ); +void compute_tendencies_z ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ); +void set_halo_values_x ( real3d const &state , Fixed_data const &fixed_data ); +void set_halo_values_z ( real3d const &state , Fixed_data const &fixed_data ); +void reductions ( realConst3d state , double &mass , double &te , Fixed_data const &fixed_data ); + + +/////////////////////////////////////////////////////////////////////////////////////// +// THE MAIN PROGRAM STARTS HERE +/////////////////////////////////////////////////////////////////////////////////////// +int main(int argc, char **argv) { + MPI_Init(&argc,&argv); + yakl::init(); + { + Fixed_data fixed_data; + real3d state; + real dt; //Model time step (seconds) + + // init allocates state + init( state , dt , fixed_data ); + + auto &mainproc = fixed_data.mainproc; + + //Initial reductions for mass, kinetic energy, and total energy + double mass0, te0; + reductions(state,mass0,te0,fixed_data); + + int num_out = 0; //The number of outputs performed so far + real output_counter = 0; //Helps determine when it's time to do output + real etime = 0; + + //Output the initial state + if (output_freq >= 0) { + output(state,etime,num_out,fixed_data); + } + + int direction_switch = 1; // Tells dimensionally split which order to take x,z solves + + //////////////////////////////////////////////////// + // MAIN TIME STEP LOOP + //////////////////////////////////////////////////// + auto t1 = std::chrono::steady_clock::now(); + while (etime < sim_time) { + //If the time step leads to exceeding the simulation time, shorten it for the last step + if (etime + dt > sim_time) { dt = sim_time - etime; } + //Perform a single time step + perform_timestep(state,dt,direction_switch,fixed_data); + //Inform the user + #ifndef NO_INFORM + if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } + #endif + //Update the elapsed time and output counter + etime = etime + dt; + output_counter = output_counter + dt; + //If it's time for output, reset the counter, and do output + if (output_freq >= 0 && output_counter >= output_freq) { + output_counter = output_counter - output_freq; + output(state,etime,num_out,fixed_data); + } + } + auto t2 = std::chrono::steady_clock::now(); + if (mainproc) { + std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + } + + //Final reductions for mass, kinetic energy, and total energy + double mass, te; + reductions(state,mass,te,fixed_data); + + if (mainproc) { + printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); + printf( "d_te: %le\n" , (te - te0 )/te0 ); + } + + finalize(); + } + yakl::finalize(); + MPI_Finalize(); +} + + +//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator +//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the +//order of directions is alternated each time step. +//The Runge-Kutta method used here is defined as follows: +// q* = q_n + dt/3 * rhs(q_n) +// q** = q_n + dt/2 * rhs(q* ) +// q_n+1 = q_n + dt/1 * rhs(q**) +void perform_timestep( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ) { + auto &nx = fixed_data.nx ; + auto &nz = fixed_data.nz ; + + real3d state_tmp("state_tmp",NUM_VARS,nz+2*hs,nx+2*hs); + + if (direction_switch) { + //x-direction first + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data ); + //z-direction second + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data ); + } else { + //z-direction second + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data ); + //x-direction first + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data ); + } + if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } +} + + +//Perform a single semi-discretized step in time with the form: +//state_out = state_init + dt * rhs(state_forcing) +//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out +void semi_discrete_step( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ) { + auto &nx = fixed_data.nx ; + auto &nz = fixed_data.nz ; + auto &i_beg = fixed_data.i_beg ; + auto &k_beg = fixed_data.k_beg ; + auto &hy_dens_cell = fixed_data.hy_dens_cell ; + + real3d tend("tend",NUM_VARS,nz,nx); + + if (dir == DIR_X) { + //Set the halo values for this MPI task's fluid state in the x-direction + yakl::timer_start("halo x"); + set_halo_values_x(state_forcing,fixed_data); + yakl::timer_stop("halo x"); + //Compute the time tendencies for the fluid state in the x-direction + yakl::timer_start("tendencies x"); + compute_tendencies_x(state_forcing,tend,dt,fixed_data); + yakl::timer_stop("tendencies x"); + } else if (dir == DIR_Z) { + //Set the halo values for this MPI task's fluid state in the z-direction + yakl::timer_start("halo z"); + set_halo_values_z(state_forcing,fixed_data); + yakl::timer_stop("halo z"); + //Compute the time tendencies for the fluid state in the z-direction + yakl::timer_start("tendencies z"); + compute_tendencies_z(state_forcing,tend,dt,fixed_data); + yakl::timer_stop("tendencies z"); + } + + ///////////////////////////////////////////////// + // TODO: MAKE THESE 3 LOOPS A PARALLEL_FOR + ///////////////////////////////////////////////// + //Apply the tendencies to the fluid state + yakl::timer_start("apply tendencies"); + for (int ll=0; ll stencil; + SArray d3_vals; + SArray vals; + //Use fourth-order interpolation from four cell averages to compute the value at the interface in question + for (int ll=0; ll stencil; + SArray d3_vals; + SArray vals; + //Use fourth-order interpolation from four cell averages to compute the value at the interface in question + for (int ll=0; ll qpoints; + SArray qweights; + + qpoints(0) = 0.112701665379258311482073460022; + qpoints(1) = 0.500000000000000000000000000000; + qpoints(2) = 0.887298334620741688517926539980; + + qweights(0) = 0.277777777777777777777777777779; + qweights(1) = 0.444444444444444444444444444444; + qweights(2) = 0.277777777777777777777777777779; + + ////////////////////////////////////////////////////////////////////////// + // Initialize the cell-averaged fluid state via Gauss-Legendre quadrature + ////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////// + // TODO: MAKE THESE 2 LOOPS A PARALLEL_FOR + ///////////////////////////////////////////////// + for (int k=0; k #include #include +#include #include "const.h" #include "pnetcdf.h" #include @@ -61,14 +62,14 @@ struct Fixed_data { //Declaring the functions defined after "main" void init ( real3d &state , real &dt , Fixed_data &fixed_data ); void finalize ( ); -YAKL_INLINE void injection ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -YAKL_INLINE void density_current ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -YAKL_INLINE void gravity_waves ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -YAKL_INLINE void thermal ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -YAKL_INLINE void collision ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -YAKL_INLINE void hydro_const_theta ( real z , real &r , real &t ); -YAKL_INLINE void hydro_const_bvfreq ( real z , real bv_freq0 , real &r , real &t ); -YAKL_INLINE real sample_ellipse_cosine( real x , real z , real amp , real x0 , real z0 , real xrad , real zrad ); +void injection ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); +void density_current ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); +void gravity_waves ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); +void thermal ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); +void collision ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); +void hydro_const_theta ( real z , real &r , real &t ); +void hydro_const_bvfreq ( real z , real bv_freq0 , real &r , real &t ); +real sample_ellipse_cosine( real x , real z , real amp , real x0 , real z0 , real xrad , real zrad ); void output ( realConst3d state , real etime , int &num_out , Fixed_data const &fixed_data ); void ncwrap ( int ierr , int line ); void perform_timestep ( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ); @@ -114,7 +115,7 @@ int main(int argc, char **argv) { //////////////////////////////////////////////////// // MAIN TIME STEP LOOP //////////////////////////////////////////////////// - yakl::fence(); + Kokkos::fence(); auto t1 = std::chrono::steady_clock::now(); while (etime < sim_time) { //If the time step leads to exceeding the simulation time, shorten it for the last step @@ -134,7 +135,7 @@ int main(int argc, char **argv) { output(state,etime,num_out,fixed_data); } } - yakl::fence(); + Kokkos::fence(); auto t2 = std::chrono::steady_clock::now(); if (mainproc) { std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; @@ -229,7 +230,7 @@ void semi_discrete_step( realConst3d state_init , real3d const &state_forcing , // for (k=0; k(NUM_VARS,nz,nx) , YAKL_LAMBDA ( int ll, int k, int i ) { + parallel_for( SimpleBounds<3>(NUM_VARS,nz,nx) , KOKKOS_LAMBDA ( int ll, int k, int i ) { if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) { real x = (i_beg + i+0.5)*dx; real z = (k_beg + k+0.5)*dz; @@ -259,7 +260,7 @@ void compute_tendencies_x( realConst3d state , real3d const &tend , real dt , Fi //Compute fluxes in the x-direction for each cell // for (k=0; k(nz,nx+1) , YAKL_LAMBDA (int k, int i ) { + parallel_for( SimpleBounds<2>(nz,nx+1) , KOKKOS_LAMBDA (int k, int i ) { SArray stencil; SArray d3_vals; SArray vals; @@ -293,7 +294,7 @@ void compute_tendencies_x( realConst3d state , real3d const &tend , real dt , Fi // for (ll=0; ll(NUM_VARS,nz,nx) , YAKL_LAMBDA ( int ll, int k, int i ) { + parallel_for( SimpleBounds<3>(NUM_VARS,nz,nx) , KOKKOS_LAMBDA ( int ll, int k, int i ) { tend(ll,k,i) = -( flux(ll,k,i+1) - flux(ll,k,i) ) / dx; }); } @@ -317,7 +318,7 @@ void compute_tendencies_z( realConst3d state , real3d const &tend , real dt , Fi //Compute fluxes in the x-direction for each cell // for (k=0; k(nz+1,nx) , YAKL_LAMBDA (int k, int i) { + parallel_for( SimpleBounds<2>(nz+1,nx) , KOKKOS_LAMBDA (int k, int i) { SArray stencil; SArray d3_vals; SArray vals; @@ -355,7 +356,7 @@ void compute_tendencies_z( realConst3d state , real3d const &tend , real dt , Fi // for (ll=0; ll(NUM_VARS,nz,nx) , YAKL_LAMBDA ( int ll, int k, int i ) { + parallel_for( SimpleBounds<3>(NUM_VARS,nz,nx) , KOKKOS_LAMBDA ( int ll, int k, int i ) { tend(ll,k,i) = -( flux(ll,k+1,i) - flux(ll,k,i) ) / dz; if (ll == ID_WMOM) { tend(ll,k,i) -= state(ID_DENS,hs+k,hs+i)*grav; @@ -381,7 +382,7 @@ void set_halo_values_x( real3d const &state , Fixed_data const &fixed_data ) { if (fixed_data.nranks == 1) { - parallel_for( SimpleBounds<2>(NUM_VARS,nz) , YAKL_LAMBDA (int ll, int k) { + parallel_for( SimpleBounds<2>(NUM_VARS,nz) , KOKKOS_LAMBDA (int ll, int k) { state(ll,hs+k,0 ) = state(ll,hs+k,nx+hs-2); state(ll,hs+k,1 ) = state(ll,hs+k,nx+hs-1); state(ll,hs+k,nx+hs ) = state(ll,hs+k,hs ); @@ -403,7 +404,7 @@ void set_halo_values_x( real3d const &state , Fixed_data const &fixed_data ) { //Prepost receives #ifdef GPU_AWARE_MPI - yakl::fence(); + Kokkos::fence(); ierr = MPI_Irecv(recvbuf_l.data(),hs*nz*NUM_VARS,mpi_type, left_rank,0,MPI_COMM_WORLD,&req_r[0]); ierr = MPI_Irecv(recvbuf_r.data(),hs*nz*NUM_VARS,mpi_type,right_rank,1,MPI_COMM_WORLD,&req_r[1]); #else @@ -415,17 +416,17 @@ void set_halo_values_x( real3d const &state , Fixed_data const &fixed_data ) { // for (ll=0; ll(NUM_VARS,nz,hs) , YAKL_LAMBDA (int ll, int k, int s) { + parallel_for( SimpleBounds<3>(NUM_VARS,nz,hs) , KOKKOS_LAMBDA (int ll, int k, int s) { sendbuf_l(ll,k,s) = state(ll,k+hs,hs+s); sendbuf_r(ll,k,s) = state(ll,k+hs,nx+s); }); - yakl::fence(); + Kokkos::fence(); #ifndef GPU_AWARE_MPI // This will copy from GPU to host sendbuf_l.deep_copy_to(sendbuf_l_cpu); sendbuf_r.deep_copy_to(sendbuf_r_cpu); - yakl::fence(); + Kokkos::fence(); #endif //Fire off the sends @@ -444,18 +445,18 @@ void set_halo_values_x( real3d const &state , Fixed_data const &fixed_data ) { // This will copy from host to GPU recvbuf_l_cpu.deep_copy_to(recvbuf_l); recvbuf_r_cpu.deep_copy_to(recvbuf_r); - yakl::fence(); + Kokkos::fence(); #endif //Unpack the receive buffers // for (ll=0; ll(NUM_VARS,nz,hs) , YAKL_LAMBDA (int ll, int k, int s) { + parallel_for( SimpleBounds<3>(NUM_VARS,nz,hs) , KOKKOS_LAMBDA (int ll, int k, int s) { state(ll,k+hs,s ) = recvbuf_l(ll,k,s); state(ll,k+hs,nx+hs+s) = recvbuf_r(ll,k,s); }); - yakl::fence(); + Kokkos::fence(); //Wait for sends to finish ierr = MPI_Waitall(2,req_s,MPI_STATUSES_IGNORE); @@ -466,7 +467,7 @@ void set_halo_values_x( real3d const &state , Fixed_data const &fixed_data ) { if (myrank == 0) { // for (k=0; k(nz,hs) , YAKL_LAMBDA (int k, int i) { + parallel_for( SimpleBounds<2>(nz,hs) , KOKKOS_LAMBDA (int k, int i) { double z = (k_beg + k+0.5)*dz; if (abs(z-3*zlen/4) <= zlen/16) { state(ID_UMOM,hs+k,i) = (state(ID_DENS,hs+k,i)+hy_dens_cell(hs+k)) * 50; @@ -487,7 +488,7 @@ void set_halo_values_z( real3d const &state , Fixed_data const &fixed_data ) { // for (ll=0; ll(NUM_VARS,nx+2*hs) , YAKL_LAMBDA (int ll, int i) { + parallel_for( SimpleBounds<2>(NUM_VARS,nx+2*hs) , KOKKOS_LAMBDA (int ll, int i) { if (ll == ID_WMOM) { state(ll,0 ,i) = 0.; state(ll,1 ,i) = 0.; @@ -569,7 +570,7 @@ void init( real3d &state , real &dt , Fixed_data &fixed_data ) { ////////////////////////////////////////////////////////////////////////// // for (k=0; k(nz+2*hs,nx+2*hs) , YAKL_LAMBDA (int k, int i) { + parallel_for( SimpleBounds<2>(nz+2*hs,nx+2*hs) , KOKKOS_LAMBDA (int k, int i) { //Initialize the state to zero for (int ll=0; ll(nz,nx) , YAKL_LAMBDA (int k, int i) { + parallel_for( SimpleBounds<2>(nz,nx) , KOKKOS_LAMBDA (int k, int i) { dens (k,i) = state(ID_DENS,hs+k,hs+i); uwnd (k,i) = state(ID_UMOM,hs+k,hs+i) / ( hy_dens_cell(hs+k) + state(ID_DENS,hs+k,hs+i) ); wwnd (k,i) = state(ID_WMOM,hs+k,hs+i) / ( hy_dens_cell(hs+k) + state(ID_DENS,hs+k,hs+i) ); theta(k,i) = ( state(ID_RHOT,hs+k,hs+i) + hy_dens_theta_cell(hs+k) ) / ( hy_dens_cell(hs+k) + state(ID_DENS,hs+k,hs+i) ) - hy_dens_theta_cell(hs+k) / hy_dens_cell(hs+k); }); - yakl::fence(); + Kokkos::fence(); //Write the grid data to file with all the processes writing collectively st3[0] = num_out; st3[1] = k_beg; st3[2] = i_beg; @@ -875,7 +876,7 @@ void reductions( realConst3d state, double &mass , double &te , Fixed_data const // for (k=0; k(nz,nx) , YAKL_LAMBDA (int k, int i) { + parallel_for( SimpleBounds<2>(nz,nx) , KOKKOS_LAMBDA (int k, int i) { double r = state(ID_DENS,hs+k,hs+i) + hy_dens_cell(hs+k); // Density double u = state(ID_UMOM,hs+k,hs+i) / r; // U-wind double w = state(ID_WMOM,hs+k,hs+i) / r; // W-wind diff --git a/cpp_yakl/miniWeather_serial.cpp b/cpp_yakl/miniWeather_serial.cpp new file mode 100644 index 00000000..2107c304 --- /dev/null +++ b/cpp_yakl/miniWeather_serial.cpp @@ -0,0 +1,850 @@ + +////////////////////////////////////////////////////////////////////////////////////////// +// miniWeather +// Author: Matt Norman , Oak Ridge National Laboratory +// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows +// For documentation, please see the attached documentation in the "documentation" folder +// +////////////////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include "const.h" +#include "pnetcdf.h" +#include + +// We're going to define all arrays on the host because this doesn't use parallel_for +typedef yakl::Array real1d; +typedef yakl::Array real2d; +typedef yakl::Array real3d; +typedef yakl::Array doub1d; +typedef yakl::Array doub2d; +typedef yakl::Array doub3d; + +typedef yakl::Array realConst1d; +typedef yakl::Array realConst2d; +typedef yakl::Array realConst3d; +typedef yakl::Array doubConst1d; +typedef yakl::Array doubConst2d; +typedef yakl::Array doubConst3d; + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are initialized but remain static over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +struct Fixed_data { + int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task + int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task + int nranks, myrank; //Number of MPI ranks and my rank id + int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain + int mainproc; //Am I the main process (rank == 0)? + realConst1d hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) + realConst1d hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) + realConst1d hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) + realConst1d hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) + realConst1d hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) +}; + +//Declaring the functions defined after "main" +void init ( real3d &state , real &dt , Fixed_data &fixed_data ); +void finalize ( ); +void injection ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); +void density_current ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); +void gravity_waves ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); +void thermal ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); +void collision ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); +void hydro_const_theta ( real z , real &r , real &t ); +void hydro_const_bvfreq ( real z , real bv_freq0 , real &r , real &t ); +real sample_ellipse_cosine( real x , real z , real amp , real x0 , real z0 , real xrad , real zrad ); +void output ( realConst3d state , real etime , int &num_out , Fixed_data const &fixed_data ); +void ncwrap ( int ierr , int line ); +void perform_timestep ( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ); +void semi_discrete_step ( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ); +void compute_tendencies_x ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ); +void compute_tendencies_z ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ); +void set_halo_values_x ( real3d const &state , Fixed_data const &fixed_data ); +void set_halo_values_z ( real3d const &state , Fixed_data const &fixed_data ); +void reductions ( realConst3d state , double &mass , double &te , Fixed_data const &fixed_data ); + + +/////////////////////////////////////////////////////////////////////////////////////// +// THE MAIN PROGRAM STARTS HERE +/////////////////////////////////////////////////////////////////////////////////////// +int main(int argc, char **argv) { + MPI_Init(&argc,&argv); + yakl::init(); + { + Fixed_data fixed_data; + real3d state; + real dt; //Model time step (seconds) + + // init allocates state + init( state , dt , fixed_data ); + + auto &mainproc = fixed_data.mainproc; + + //Initial reductions for mass, kinetic energy, and total energy + double mass0, te0; + reductions(state,mass0,te0,fixed_data); + + int num_out = 0; //The number of outputs performed so far + real output_counter = 0; //Helps determine when it's time to do output + real etime = 0; + + //Output the initial state + if (output_freq >= 0) { + output(state,etime,num_out,fixed_data); + } + + int direction_switch = 1; // Tells dimensionally split which order to take x,z solves + + //////////////////////////////////////////////////// + // MAIN TIME STEP LOOP + //////////////////////////////////////////////////// + auto t1 = std::chrono::steady_clock::now(); + while (etime < sim_time) { + //If the time step leads to exceeding the simulation time, shorten it for the last step + if (etime + dt > sim_time) { dt = sim_time - etime; } + //Perform a single time step + perform_timestep(state,dt,direction_switch,fixed_data); + //Inform the user + #ifndef NO_INFORM + if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } + #endif + //Update the elapsed time and output counter + etime = etime + dt; + output_counter = output_counter + dt; + //If it's time for output, reset the counter, and do output + if (output_freq >= 0 && output_counter >= output_freq) { + output_counter = output_counter - output_freq; + output(state,etime,num_out,fixed_data); + } + } + auto t2 = std::chrono::steady_clock::now(); + if (mainproc) { + std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + } + + //Final reductions for mass, kinetic energy, and total energy + double mass, te; + reductions(state,mass,te,fixed_data); + + if (mainproc) { + printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); + printf( "d_te: %le\n" , (te - te0 )/te0 ); + } + + finalize(); + } + yakl::finalize(); + MPI_Finalize(); +} + + +//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator +//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the +//order of directions is alternated each time step. +//The Runge-Kutta method used here is defined as follows: +// q* = q_n + dt/3 * rhs(q_n) +// q** = q_n + dt/2 * rhs(q* ) +// q_n+1 = q_n + dt/1 * rhs(q**) +void perform_timestep( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ) { + auto &nx = fixed_data.nx ; + auto &nz = fixed_data.nz ; + + real3d state_tmp("state_tmp",NUM_VARS,nz+2*hs,nx+2*hs); + + if (direction_switch) { + //x-direction first + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data ); + //z-direction second + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data ); + } else { + //z-direction second + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data ); + //x-direction first + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data ); + } + if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } +} + + +//Perform a single semi-discretized step in time with the form: +//state_out = state_init + dt * rhs(state_forcing) +//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out +void semi_discrete_step( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ) { + auto &nx = fixed_data.nx ; + auto &nz = fixed_data.nz ; + auto &i_beg = fixed_data.i_beg ; + auto &k_beg = fixed_data.k_beg ; + auto &hy_dens_cell = fixed_data.hy_dens_cell ; + + real3d tend("tend",NUM_VARS,nz,nx); + + if (dir == DIR_X) { + //Set the halo values for this MPI task's fluid state in the x-direction + yakl::timer_start("halo x"); + set_halo_values_x(state_forcing,fixed_data); + yakl::timer_stop("halo x"); + //Compute the time tendencies for the fluid state in the x-direction + yakl::timer_start("tendencies x"); + compute_tendencies_x(state_forcing,tend,dt,fixed_data); + yakl::timer_stop("tendencies x"); + } else if (dir == DIR_Z) { + //Set the halo values for this MPI task's fluid state in the z-direction + yakl::timer_start("halo z"); + set_halo_values_z(state_forcing,fixed_data); + yakl::timer_stop("halo z"); + //Compute the time tendencies for the fluid state in the z-direction + yakl::timer_start("tendencies z"); + compute_tendencies_z(state_forcing,tend,dt,fixed_data); + yakl::timer_stop("tendencies z"); + } + + ///////////////////////////////////////////////// + // TODO: MAKE THESE 3 LOOPS A PARALLEL_FOR + ///////////////////////////////////////////////// + //Apply the tendencies to the fluid state + yakl::timer_start("apply tendencies"); + for (int ll=0; ll stencil; + SArray d3_vals; + SArray vals; + //Use fourth-order interpolation from four cell averages to compute the value at the interface in question + for (int ll=0; ll stencil; + SArray d3_vals; + SArray vals; + //Use fourth-order interpolation from four cell averages to compute the value at the interface in question + for (int ll=0; ll qpoints; + SArray qweights; + + qpoints(0) = 0.112701665379258311482073460022; + qpoints(1) = 0.500000000000000000000000000000; + qpoints(2) = 0.887298334620741688517926539980; + + qweights(0) = 0.277777777777777777777777777779; + qweights(1) = 0.444444444444444444444444444444; + qweights(2) = 0.277777777777777777777777777779; + + ////////////////////////////////////////////////////////////////////////// + // Initialize the cell-averaged fluid state via Gauss-Legendre quadrature + ////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////// + // TODO: MAKE THESE 2 LOOPS A PARALLEL_FOR + ///////////////////////////////////////////////// + for (int k=0; k Date: Thu, 19 Dec 2024 12:50:06 -0700 Subject: [PATCH 2/3] Update apply-clang-format --- common/apply-clang-format | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/apply-clang-format b/common/apply-clang-format index 547e2e81..9bbbf440 100755 --- a/common/apply-clang-format +++ b/common/apply-clang-format @@ -13,7 +13,7 @@ CLANG_FORMAT_VERSION="$(${CLANG_FORMAT_EXECUTABLE} --version)" CLANG_FORMAT_MAJOR_VERSION=$(echo "${CLANG_FORMAT_VERSION}" | sed 's/^[^0-9]*\([0-9]*\).*$/\1/g') CLANG_FORMAT_MINOR_VERSION=$(echo "${CLANG_FORMAT_VERSION}" | sed 's/^[^0-9]*[0-9]*\.\([0-9]*\).*$/\1/g') -if [ "${CLANG_FORMAT_MAJOR_VERSION}" -ne 16 ] || [ "${CLANG_FORMAT_MINOR_VERSION}" -ne 0 ]; then +if [ "${CLANG_FORMAT_MAJOR_VERSION}" -le 16 ]; then echo "*** This indent script requires clang-format version 16.0," echo "*** but version ${CLANG_FORMAT_MAJOR_VERSION}.${CLANG_FORMAT_MINOR_VERSION} was found instead." exit 1 @@ -21,8 +21,8 @@ fi BASE_DIR="$(git rev-parse --show-toplevel)" cd $BASE_DIR -if [ ! -f "scripts/apply-clang-format" ]; then - echo "*** The indenting script must be executed from within the Kokkos clone!" +if [ ! -f "common/apply-clang-format" ]; then + echo "*** The indenting script must be executed from within the project clone!" exit 1 fi From 79db14b40598c441a2e41825dee6191d3868e83c Mon Sep 17 00:00:00 2001 From: "Jan Ciesko (-EXP)" Date: Thu, 19 Dec 2024 12:50:22 -0700 Subject: [PATCH 3/3] Apply clang-format --- README.md | 34 +- cpp/CMakeLists.txt | 6 +- cpp/cmake/utils.cmake | 4 +- cpp/miniWeather_mpi.cpp | 1425 +++++++++------- cpp/miniWeather_mpi_openacc.cpp | 1485 +++++++++------- cpp/miniWeather_mpi_openmp.cpp | 1441 +++++++++------- cpp/miniWeather_mpi_openmp45.cpp | 1511 ++++++++++------- cpp/miniWeather_serial.cpp | 1300 ++++++++------ cpp_yakl/CMakeLists.txt | 12 +- cpp_yakl/cmake/modules/FindYAKL.cmake | 2 +- cpp_yakl/cmake/utils.cmake | 4 +- cpp_yakl/const.h | 109 +- .../miniWeather_mpi_parallelfor_simd_x.cpp | 1480 +++++++++------- cpp_yakl/miniWeather_mpi.cpp | 1246 ++++++++------ cpp_yakl/miniWeather_mpi_parallelfor.cpp | 1483 +++++++++------- cpp_yakl/miniWeather_serial.cpp | 1161 +++++++------ fortran/CMakeLists.txt | 16 +- fortran/utils.cmake | 6 +- petascale_institute.md | 6 +- 19 files changed, 7237 insertions(+), 5494 deletions(-) diff --git a/README.md b/README.md index 0d2d30c7..1672954f 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ Contributors: The miniWeather code mimics the basic dynamics seen in atmospheric weather and climate. The dynamics themselves are dry compressible, stratified, non-hydrostatic flows dominated by buoyant forces that are relatively small perturbations on a hydrostatic background state. The equations in this code themselves form the backbone of pretty much all fluid dynamics codes, and this particular flavor forms the base of all weather and climate modeling. -With about 500 total lines of code (and only about 200 lines that you care about), it serves as an approachable place to learn parallelization and porting using MPI + X, where X is OpenMP, OpenACC, CUDA, or potentially other approaches to CPU and accelerated parallelization. The code uses periodic boundary conditions in the x-direction and solid wall boundary conditions in the z-direction. +With about 500 total lines of code (and only about 200 lines that you care about), it serves as an approachable place to learn parallelization and porting using MPI + X, where X is OpenMP, OpenACC, CUDA, or potentially other approaches to CPU and accelerated parallelization. The code uses periodic boundary conditions in the x-direction and solid wall boundary conditions in the z-direction. ## Brief Description of the Code @@ -181,7 +181,7 @@ After the `cmake` configure, type `make -j` to build the code, and type `make te ### C++ -For the C++ code, there are three configurations: serial, mpi, and mpi+`parallel_for`. The latter uses a C++ kernel-launching approach, which is essentially CUDA with greater portability for multiple backends. This code also uses `cmake`, and you can use the summit scripts as examples. +For the C++ code, there are three configurations: serial, mpi, and mpi+`parallel_for`. The latter uses a C++ kernel-launching approach, which is essentially CUDA with greater portability for multiple backends. This code also uses `cmake`, and you can use the summit scripts as examples. ## Altering the Code's Configurations @@ -193,7 +193,7 @@ To alter the configuration of the code, you can control the number of cells in t * `-DOUT_FREQ=10`: Outputs every 10 seconds model time * `-DDATA_SPEC=DATA_SPEC_THERMAL`: Initializes a rising thermal -It's best if you keep `NX` exactly twice the value of `NZ` since the domain is 20km x 10km. +It's best if you keep `NX` exactly twice the value of `NZ` since the domain is 20km x 10km. The data specifications are `DATA_SPEC_COLLISION`, `DATA_SPEC_THERMAL`, `DATA_SPEC_MOUNTAIN`, `DATA_SPEC_DENSITY_CURRENT`, and `DATA_SPEC_INJECTION`, and each are described later on. @@ -211,13 +211,13 @@ Since parameters are set in the code itself, you don't need to pass any paramete ## Viewing the Output -The file I/O is done in the netCDF format: (https://www.unidata.ucar.edu/software/netcdf). To me, the easiest way to view the data is to use a tool called “ncview” (http://meteora.ucsd.edu/~pierce/ncview_home_page.html). To use it, you can simply type `ncview output.nc`, making sure you have X-forwarding enabled in your ssh session. Further, you can call `ncview -frames output.nc`, and it will dump out all of your frames in the native resolution you're viewing the data in, and you you can render a movie with tools like `ffmpeg`. +The file I/O is done in the netCDF format: (https://www.unidata.ucar.edu/software/netcdf). To me, the easiest way to view the data is to use a tool called “ncview” (http://meteora.ucsd.edu/~pierce/ncview_home_page.html). To use it, you can simply type `ncview output.nc`, making sure you have X-forwarding enabled in your ssh session. Further, you can call `ncview -frames output.nc`, and it will dump out all of your frames in the native resolution you're viewing the data in, and you you can render a movie with tools like `ffmpeg`. # Parallelization This code was designed to parallelize with MPI first and then OpenMP, OpenACC, OpenMP offload, or `parallel_for` next, but you can always parallelize with OpenMP or OpenACC without MPI if you want. But it is rewarding to be able to run it on multiple nodes at higher resolution for more and sharper eddies in the dynamics. -As you port the code, you'll want to change relatively little code at a time, re-compile, re-run, and look at the output to see that you're still getting the right answer. There are advantages to using a visual tool to check the answer (e.g., `ncview`), as it can sometimes give you clues as to why you're not getting the right answer. +As you port the code, you'll want to change relatively little code at a time, re-compile, re-run, and look at the output to see that you're still getting the right answer. There are advantages to using a visual tool to check the answer (e.g., `ncview`), as it can sometimes give you clues as to why you're not getting the right answer. Note that you only need to make changes code within the first 450 source lines for C and Fortran, and each loop that needs threading is decorated with a `// THREAD ME` comment. Everything below that is initialization and I/O code that doesn't need to be parallelized (unless you want to) for C and Fortran directives-based approaches. @@ -257,7 +257,7 @@ The second place is in the routine that sets the halo values in the x-direction. 4. Receive the data from your left and right MPI neighbors -5. Unpack the data from your left and right neighbors and place the data into your MPI rank's halo cells. +5. Unpack the data from your left and right neighbors and place the data into your MPI rank's halo cells. Once you complete this, the code will be fully parallelized in MPI. Both of the places you need to add code for MPI are marked in the serial code, and there are some extra hints in the `set_halo_values_x()` routine as well. @@ -343,7 +343,7 @@ In the C code, you'll need to put in manual `copy()`, `copyin()`, and `copyout() #pragma acc data copy( varname[ starting_index : size_of_transfer ] ) ``` -So, for instance, if you send a variable, `var`, of size `n` to the GPU, you will say, `#pragma acc data copyin(var[0:n])`. Many would expect it to look like an array slice (e.g., `(0:n-1)`), but it is not. +So, for instance, if you send a variable, `var`, of size `n` to the GPU, you will say, `#pragma acc data copyin(var[0:n])`. Many would expect it to look like an array slice (e.g., `(0:n-1)`), but it is not. Other than this, the approach is the same as with the Fortran case. @@ -425,7 +425,7 @@ inline void applyTendencies(realArr &state2, real const c0, realArr const &state state2(l,hs+k,hs+j,hs+i) = c0 * state0(l,hs+k,hs+j,hs+i) + c1 * state1(l,hs+k,hs+j,hs+i) + ct * dom.dt * tend(l,k,j,i); - }); + }); } ``` @@ -463,9 +463,9 @@ realArrHost recvbuf_l_cpu; realArrHost recvbuf_r_cpu; ``` -You'll also need to replace the buffers in `MPI_Isend()` and `MPI_Irecv()` with the CPU versions. +You'll also need to replace the buffers in `MPI_Isend()` and `MPI_Irecv()` with the CPU versions. -Next, you need to allocate these in `init()` in a similar manner as the existing MPI buffers, but replacing `realArr` with `realArrHost`. +Next, you need to allocate these in `init()` in a similar manner as the existing MPI buffers, but replacing `realArr` with `realArrHost`. Finally, you'll need to manage data movement to and from the CPU in the File I/O and in the MPI message exchanges. @@ -481,7 +481,7 @@ For the MPI buffers, you'll need to use the `Array::deep_copy_to(Array &target)` sendbuf_l.deep_copy_to(sendbuf_l_cpu); ``` -A deep copy from a device Array to a host Array will invoke `cudaMemcopy(...,cudaMemcpyDeviceToHost)`, and a deep copy from a host Array to a device Array will invoke `cudaMemcpy(...,cudaMemcpyHostToDevice)` under the hood. You will need to copy the send buffers from device to host just before calling `MPI_Isend()`, and you will need to copy the recv buffers from host to device just after `MPI_WaitAll()` on the receive requests, `req_r`. +A deep copy from a device Array to a host Array will invoke `cudaMemcopy(...,cudaMemcpyDeviceToHost)`, and a deep copy from a host Array to a device Array will invoke `cudaMemcpy(...,cudaMemcpyHostToDevice)` under the hood. You will need to copy the send buffers from device to host just before calling `MPI_Isend()`, and you will need to copy the recv buffers from host to device just after `MPI_WaitAll()` on the receive requests, `req_r`. ### Why Doesn't MiniWeather Use CUDA? @@ -491,13 +491,13 @@ Because if you've refactored your code to use kernel launching (i.e., CUDA), you I chose not to use the mainline C++ portability frameworks for two main reasons. -1. It's easier to compile and managed things with a C++ performance portability layer that's < 3K lines of code long, hence: [YAKL (Yet Another Kernel Launcher)](github.com/mrnorman/YAKL). +1. It's easier to compile and managed things with a C++ performance portability layer that's < 3K lines of code long, hence: [YAKL (Yet Another Kernel Launcher)](github.com/mrnorman/YAKL). 2. Kokkos in particular would not play nicely with the rest of the code in the CMake project. Likely if a Kokkos version is added, it will need to be a completely separate project and directory. 3. With `YAKL.h` and `Array.h`, you can see for yourself what's going on when we launch kernels using `parallel_for` on different hardware backends. # Numerical Experiments -A number of numerical experiments are in the code for you to play around with. You can set these by changing the `data_spec_int` variable. +A number of numerical experiments are in the code for you to play around with. You can set these by changing the `data_spec_int` variable. ## Rising Thermal @@ -578,7 +578,7 @@ data_spec_int = DATA_SPEC_INJECTION sim_time = 1200 ``` -A narrow jet of fast and slightly cold wind is injected into a balanced, neutral atmosphere at rest from the left domain near the model top. This has nothing to do with atmospheric flows. It's just here for looks. +A narrow jet of fast and slightly cold wind is injected into a balanced, neutral atmosphere at rest from the left domain near the model top. This has nothing to do with atmospheric flows. It's just here for looks. Potential Temperature after 300 seconds: @@ -626,7 +626,7 @@ This equation is solved using dimensional splitting for simplicity and speed. Th -Each time step, the order in which the dimensions are solved is reversed, giving second-order accuracy overall. +Each time step, the order in which the dimensions are solved is reversed, giving second-order accuracy overall. ## Finite-Volume Spatial Discretization @@ -712,7 +712,7 @@ The reason you have to go to all of this trouble is because of chaotic amplifica
Click here to expand python script

- + ```python import netCDF4 import sys @@ -793,7 +793,7 @@ for v in nc1.variables.keys() : * Directives-Based Approaches * https://github.com/mrnorman/miniWeather/wiki/A-Practical-Introduction-to-GPU-Refactoring-in-Fortran-with-Directives-for-Climate - * https://www.openacc.org + * https://www.openacc.org * https://www.openacc.org/sites/default/files/inline-files/OpenACC%20API%202.6%20Reference%20Guide.pdf * https://www.openmp.org * https://www.openmp.org/wp-content/uploads/OpenMP-4.5-1115-CPP-web.pdf diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6bf8b8c9..202511ba 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -90,13 +90,13 @@ foreach(VARIANT ${VARIANTS}) add_executable(${BIN_NAME} ${SRC_NAME}) set_target_properties(${BIN_NAME} PROPERTIES COMPILE_FLAGS ${EXE_DEFS}) target_link_libraries(${BIN_NAME} PUBLIC ${PUBLIC_DEPS}) - + #Test add_executable(${BIN_TEST_NAME} ${SRC_NAME}) set_target_properties(${BIN_TEST_NAME} PROPERTIES COMPILE_FLAGS ${TEST_DEFS}) target_link_libraries(${BIN_TEST_NAME} PUBLIC ${PUBLIC_DEPS}) - + #check_output - add_test(NAME ${BIN_TEST_NAME} COMMAND ./scripts/check_output.sh ./${BIN_TEST_NAME} 1e-13 4.5e-5 ) + add_test(NAME ${BIN_TEST_NAME} COMMAND ./scripts/check_output.sh ./${BIN_TEST_NAME} 1e-13 4.5e-5 ) endforeach() diff --git a/cpp/cmake/utils.cmake b/cpp/cmake/utils.cmake index 602b7d93..d28b8d75 100644 --- a/cpp/cmake/utils.cmake +++ b/cpp/cmake/utils.cmake @@ -16,7 +16,7 @@ if( ${_ind} GREATER -1 ) endif() if (${DO_OPENACC}) MESSAGE(STATUS "*** Compiling OpenACC code with ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION} using the flags: ${OPENACC_FLAGS}") -else() +else() MESSAGE(STATUS "*** OpenACC code will not be compiled.") endif() endmacro(determine_openacc) @@ -40,7 +40,7 @@ if( ${_ind} GREATER -1 ) endif() if (${DO_OPENMP45}) MESSAGE(STATUS "*** Compiling OpenMP45 code with ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION} using the flags: ${OPENMP45_FLAGS}") -else() +else() MESSAGE(STATUS "*** OpenMP4.5 code will not be compiled.") endif() endmacro(determine_openmp45) diff --git a/cpp/miniWeather_mpi.cpp b/cpp/miniWeather_mpi.cpp index 67907f59..d7566e12 100644 --- a/cpp/miniWeather_mpi.cpp +++ b/cpp/miniWeather_mpi.cpp @@ -2,413 +2,511 @@ ////////////////////////////////////////////////////////////////////////////////////////// // miniWeather // Author: Matt Norman , Oak Ridge National Laboratory -// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows -// For documentation, please see the attached documentation in the "documentation" folder +// This code simulates dry, stratified, compressible, non-hydrostatic fluid +// flows For documentation, please see the attached documentation in the +// "documentation" folder // ////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include +#include "pnetcdf.h" +#include #include #include +#include #include -#include "pnetcdf.h" -#include +#include +#include -constexpr double pi = 3.14159265358979323846264338327; //Pi -constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) -constexpr double cp = 1004.; //Specific heat of dry air at constant pressure -constexpr double cv = 717.; //Specific heat of dry air at constant volume -constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) -constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals -constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) -constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) - -//Define domain and stability-related constants -constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) -constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) -constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] -constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) -constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) -constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction -constexpr int sten_size = 4; //Size of the stencil used for interpolation - -//Parameters for indexing and flags -constexpr int NUM_VARS = 4; //Number of fluid state variables -constexpr int ID_DENS = 0; //index for density ("rho") -constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") -constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") -constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") -constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction -constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction -constexpr int DATA_SPEC_COLLISION = 1; -constexpr int DATA_SPEC_THERMAL = 2; -constexpr int DATA_SPEC_GRAVITY_WAVES = 3; +constexpr double pi = 3.14159265358979323846264338327; // Pi +constexpr double grav = 9.8; // Gravitational acceleration (m / s^2) +constexpr double cp = 1004.; // Specific heat of dry air at constant pressure +constexpr double cv = 717.; // Specific heat of dry air at constant volume +constexpr double rd = + 287.; // Dry air constant for equation of state (P=rho*rd*T) +constexpr double p0 = 1.e5; // Standard pressure at the surface in Pascals +constexpr double C0 = + 27.5629410929725921310572974482; // Constant to translate potential + // temperature into pressure + // (P=C0*(rho*theta)**gamma) +constexpr double gamm = + 1.40027894002789400278940027894; // gamma=cp/Rd , have to call this gamm + // because "gamma" is taken (I hate C so + // much) + +// Define domain and stability-related constants +constexpr double xlen = 2.e4; // Length of the domain in the x-direction + // (meters) +constexpr double zlen = 1.e4; // Length of the domain in the z-direction + // (meters) +constexpr double hv_beta = + 0.05; // How strong to diffuse the solution: hv_beta \in [0:1] +constexpr double cfl = + 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) +constexpr double max_speed = + 450; // Assumed maximum wave speed during the simulation (speed of sound + + // speed of wind) (meter / sec) +constexpr int hs = + 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a + // full "stencil" of information for reconstruction +constexpr int sten_size = 4; // Size of the stencil used for interpolation + +// Parameters for indexing and flags +constexpr int NUM_VARS = 4; // Number of fluid state variables +constexpr int ID_DENS = 0; // index for density ("rho") +constexpr int ID_UMOM = 1; // index for momentum in the x-direction ("rho * u") +constexpr int ID_WMOM = 2; // index for momentum in the z-direction ("rho * w") +constexpr int ID_RHOT = + 3; // index for density * potential temperature ("rho * theta") +constexpr int DIR_X = + 1; // Integer constant to express that this operation is in the x-direction +constexpr int DIR_Z = + 2; // Integer constant to express that this operation is in the z-direction +constexpr int DATA_SPEC_COLLISION = 1; +constexpr int DATA_SPEC_THERMAL = 2; +constexpr int DATA_SPEC_GRAVITY_WAVES = 3; constexpr int DATA_SPEC_DENSITY_CURRENT = 5; -constexpr int DATA_SPEC_INJECTION = 6; +constexpr int DATA_SPEC_INJECTION = 6; constexpr int nqpoints = 3; -constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; -constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; +constexpr double qpoints[] = {0.112701665379258311482073460022E0, + 0.500000000000000000000000000000E0, + 0.887298334620741688517926539980E0}; +constexpr double qweights[] = {0.277777777777777777777777777779E0, + 0.444444444444444444444444444444E0, + 0.277777777777777777777777777779E0}; /////////////////////////////////////////////////////////////////////////////////////// // BEGIN USER-CONFIGURABLE PARAMETERS /////////////////////////////////////////////////////////////////////////////////////// -//The x-direction length is twice as long as the z-direction length -//So, you'll want to have nx_glob be twice as large as nz_glob -int constexpr nx_glob = _NX; //Number of total cells in the x-direction -int constexpr nz_glob = _NZ; //Number of total cells in the z-direction -double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation -double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) -int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data -double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction -double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction +// The x-direction length is twice as long as the z-direction length +// So, you'll want to have nx_glob be twice as large as nz_glob +int constexpr nx_glob = _NX; // Number of total cells in the x-direction +int constexpr nz_glob = _NZ; // Number of total cells in the z-direction +double constexpr sim_time = _SIM_TIME; // How many seconds to run the simulation +double constexpr output_freq = + _OUT_FREQ; // How frequently to output data to file (in seconds) +int constexpr data_spec_int = _DATA_SPEC; // How to initialize the data +double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction +double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction /////////////////////////////////////////////////////////////////////////////////////// // END USER-CONFIGURABLE PARAMETERS /////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////// -// Variables that are initialized but remain static over the course of the simulation +// Variables that are initialized but remain static over the course of the +// simulation /////////////////////////////////////////////////////////////////////////////////////// -double dt; //Model time step (seconds) -int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task -int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task -int nranks, myrank; //Number of MPI ranks and my rank id -int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain -int mainproc; //Am I the main process (rank == 0)? -double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) -double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) -double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) -double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) -double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) +double dt; // Model time step (seconds) +int nx, nz; // Number of local grid cells in the x- and z- dimensions for this + // MPI task +int i_beg, k_beg; // beginning index in the x- and z-directions for this MPI + // task +int nranks, myrank; // Number of MPI ranks and my rank id +int left_rank, right_rank; // MPI Rank IDs that exist to my left and right in + // the global domain +int mainproc; // Am I the main process (rank == 0)? +double *hy_dens_cell; // hydrostatic density (vert cell avgs). Dimensions: + // (1-hs:nz+hs) +double *hy_dens_theta_cell; // hydrostatic rho*t (vert cell avgs). Dimensions: + // (1-hs:nz+hs) +double * + hy_dens_int; // hydrostatic density (vert cell interf). Dimensions: (1:nz+1) +double *hy_dens_theta_int; // hydrostatic rho*t (vert cell interf). Dimensions: + // (1:nz+1) +double *hy_pressure_int; // hydrostatic press (vert cell interf). Dimensions: + // (1:nz+1) /////////////////////////////////////////////////////////////////////////////////////// // Variables that are dynamics over the course of the simulation /////////////////////////////////////////////////////////////////////////////////////// -double etime; //Elapsed model time -double output_counter; //Helps determine when it's time to do output -//Runtime variable arrays -double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) -double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) -double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) -double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) -double *sendbuf_l; //Buffer to send data to the left MPI rank -double *sendbuf_r; //Buffer to send data to the right MPI rank -double *recvbuf_l; //Buffer to receive data from the left MPI rank -double *recvbuf_r; //Buffer to receive data from the right MPI rank -int num_out = 0; //The number of outputs performed so far -int direction_switch = 1; -double mass0, te0; //Initial domain totals for mass and total energy -double mass , te ; //Domain totals for mass and total energy - -//How is this not in the standard?! -double dmin( double a , double b ) { if (a= 0) output(state,etime); + // Output the initial state + if (output_freq >= 0) + output(state, etime); //////////////////////////////////////////////////// // MAIN TIME STEP LOOP //////////////////////////////////////////////////// auto t1 = std::chrono::steady_clock::now(); while (etime < sim_time) { - //If the time step leads to exceeding the simulation time, shorten it for the last step - if (etime + dt > sim_time) { dt = sim_time - etime; } - //Perform a single time step - perform_timestep(state,state_tmp,flux,tend,dt); - //Inform the user + // If the time step leads to exceeding the simulation time, shorten it for + // the last step + if (etime + dt > sim_time) { + dt = sim_time - etime; + } + // Perform a single time step + perform_timestep(state, state_tmp, flux, tend, dt); + // Inform the user #ifndef NO_INFORM - if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } + if (mainproc) { + printf("Elapsed Time: %lf / %lf\n", etime, sim_time); + } #endif - //Update the elapsed time and output counter + // Update the elapsed time and output counter etime = etime + dt; output_counter = output_counter + dt; - //If it's time for output, reset the counter, and do output + // If it's time for output, reset the counter, and do output if (output_freq >= 0 && output_counter >= output_freq) { output_counter = output_counter - output_freq; - output(state,etime); + output(state, etime); } } auto t2 = std::chrono::steady_clock::now(); if (mainproc) { - std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + std::cout << "CPU Time: " << std::chrono::duration(t2 - t1).count() + << " sec\n"; } - //Final reductions for mass, kinetic energy, and total energy - reductions(mass,te); + // Final reductions for mass, kinetic energy, and total energy + reductions(mass, te); if (mainproc) { - printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); - printf( "d_te: %le\n" , (te - te0 )/te0 ); + printf("d_mass: %le\n", (mass - mass0) / mass0); + printf("d_te: %le\n", (te - te0) / te0); } finalize(); } - -//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator -//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the -//order of directions is alternated each time step. -//The Runge-Kutta method used here is defined as follows: -// q* = q[n] + dt/3 * rhs(q[n]) -// q** = q[n] + dt/2 * rhs(q* ) -// q[n+1] = q[n] + dt/1 * rhs(q** ) -void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) { +// Performs a single dimensionally split time step using a simple low-storage +// three-stage Runge-Kutta time integrator The dimensional splitting is a +// second-order-accurate alternating Strang splitting in which the order of +// directions is alternated each time step. The Runge-Kutta method used here is +// defined as follows: +// q* = q[n] + dt/3 * rhs(q[n]) +// q** = q[n] + dt/2 * rhs(q* ) +// q[n+1] = q[n] + dt/1 * rhs(q** ) +void perform_timestep(double *state, double *state_tmp, double *flux, + double *tend, double dt) { if (direction_switch) { - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend); + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend); } else { - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend); + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend); + } + if (direction_switch) { + direction_switch = 0; + } else { + direction_switch = 1; } - if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } } - -//Perform a single semi-discretized step in time with the form: -//state_out = state_init + dt * rhs(state_forcing) -//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out -void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) { +// Perform a single semi-discretized step in time with the form: +// state_out = state_init + dt * rhs(state_forcing) +// Meaning the step starts from state_init, computes the rhs using +// state_forcing, and stores the result in state_out +void semi_discrete_step(double *state_init, double *state_forcing, + double *state_out, double dt, int dir, double *flux, + double *tend) { int i, k, ll, inds, indt, indw; double x, z, wpert, dist, x0, z0, xrad, zrad, amp; - if (dir == DIR_X) { - //Set the halo values for this MPI task's fluid state in the x-direction + if (dir == DIR_X) { + // Set the halo values for this MPI task's fluid state in the x-direction set_halo_values_x(state_forcing); - //Compute the time tendencies for the fluid state in the x-direction - compute_tendencies_x(state_forcing,flux,tend,dt); + // Compute the time tendencies for the fluid state in the x-direction + compute_tendencies_x(state_forcing, flux, tend, dt); } else if (dir == DIR_Z) { - //Set the halo values for this MPI task's fluid state in the z-direction + // Set the halo values for this MPI task's fluid state in the z-direction set_halo_values_z(state_forcing); - //Compute the time tendencies for the fluid state in the z-direction - compute_tendencies_z(state_forcing,flux,tend,dt); + // Compute the time tendencies for the fluid state in the z-direction + compute_tendencies_z(state_forcing, flux, tend, dt); } ///////////////////////////////////////////////// // TODO: THREAD ME ///////////////////////////////////////////////// - //Apply the tendencies to the fluid state - for (ll=0; ll , Oak Ridge National Laboratory -// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows -// For documentation, please see the attached documentation in the "documentation" folder +// This code simulates dry, stratified, compressible, non-hydrostatic fluid +// flows For documentation, please see the attached documentation in the +// "documentation" folder // ////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include +#include "pnetcdf.h" +#include #include #include +#include #include -#include "pnetcdf.h" -#include +#include +#include -constexpr double pi = 3.14159265358979323846264338327; //Pi -constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) -constexpr double cp = 1004.; //Specific heat of dry air at constant pressure -constexpr double cv = 717.; //Specific heat of dry air at constant volume -constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) -constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals -constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) -constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) - -//Define domain and stability-related constants -constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) -constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) -constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] -constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) -constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) -constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction -constexpr int sten_size = 4; //Size of the stencil used for interpolation - -//Parameters for indexing and flags -constexpr int NUM_VARS = 4; //Number of fluid state variables -constexpr int ID_DENS = 0; //index for density ("rho") -constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") -constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") -constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") -constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction -constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction -constexpr int DATA_SPEC_COLLISION = 1; -constexpr int DATA_SPEC_THERMAL = 2; -constexpr int DATA_SPEC_GRAVITY_WAVES = 3; +constexpr double pi = 3.14159265358979323846264338327; // Pi +constexpr double grav = 9.8; // Gravitational acceleration (m / s^2) +constexpr double cp = 1004.; // Specific heat of dry air at constant pressure +constexpr double cv = 717.; // Specific heat of dry air at constant volume +constexpr double rd = + 287.; // Dry air constant for equation of state (P=rho*rd*T) +constexpr double p0 = 1.e5; // Standard pressure at the surface in Pascals +constexpr double C0 = + 27.5629410929725921310572974482; // Constant to translate potential + // temperature into pressure + // (P=C0*(rho*theta)**gamma) +constexpr double gamm = + 1.40027894002789400278940027894; // gamma=cp/Rd , have to call this gamm + // because "gamma" is taken (I hate C so + // much) + +// Define domain and stability-related constants +constexpr double xlen = 2.e4; // Length of the domain in the x-direction + // (meters) +constexpr double zlen = 1.e4; // Length of the domain in the z-direction + // (meters) +constexpr double hv_beta = + 0.05; // How strong to diffuse the solution: hv_beta \in [0:1] +constexpr double cfl = + 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) +constexpr double max_speed = + 450; // Assumed maximum wave speed during the simulation (speed of sound + + // speed of wind) (meter / sec) +constexpr int hs = + 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a + // full "stencil" of information for reconstruction +constexpr int sten_size = 4; // Size of the stencil used for interpolation + +// Parameters for indexing and flags +constexpr int NUM_VARS = 4; // Number of fluid state variables +constexpr int ID_DENS = 0; // index for density ("rho") +constexpr int ID_UMOM = 1; // index for momentum in the x-direction ("rho * u") +constexpr int ID_WMOM = 2; // index for momentum in the z-direction ("rho * w") +constexpr int ID_RHOT = + 3; // index for density * potential temperature ("rho * theta") +constexpr int DIR_X = + 1; // Integer constant to express that this operation is in the x-direction +constexpr int DIR_Z = + 2; // Integer constant to express that this operation is in the z-direction +constexpr int DATA_SPEC_COLLISION = 1; +constexpr int DATA_SPEC_THERMAL = 2; +constexpr int DATA_SPEC_GRAVITY_WAVES = 3; constexpr int DATA_SPEC_DENSITY_CURRENT = 5; -constexpr int DATA_SPEC_INJECTION = 6; +constexpr int DATA_SPEC_INJECTION = 6; constexpr int nqpoints = 3; -constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; -constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; +constexpr double qpoints[] = {0.112701665379258311482073460022E0, + 0.500000000000000000000000000000E0, + 0.887298334620741688517926539980E0}; +constexpr double qweights[] = {0.277777777777777777777777777779E0, + 0.444444444444444444444444444444E0, + 0.277777777777777777777777777779E0}; /////////////////////////////////////////////////////////////////////////////////////// // BEGIN USER-CONFIGURABLE PARAMETERS /////////////////////////////////////////////////////////////////////////////////////// -//The x-direction length is twice as long as the z-direction length -//So, you'll want to have nx_glob be twice as large as nz_glob -int constexpr nx_glob = _NX; //Number of total cells in the x-direction -int constexpr nz_glob = _NZ; //Number of total cells in the z-direction -double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation -double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) -int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data -double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction -double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction +// The x-direction length is twice as long as the z-direction length +// So, you'll want to have nx_glob be twice as large as nz_glob +int constexpr nx_glob = _NX; // Number of total cells in the x-direction +int constexpr nz_glob = _NZ; // Number of total cells in the z-direction +double constexpr sim_time = _SIM_TIME; // How many seconds to run the simulation +double constexpr output_freq = + _OUT_FREQ; // How frequently to output data to file (in seconds) +int constexpr data_spec_int = _DATA_SPEC; // How to initialize the data +double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction +double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction /////////////////////////////////////////////////////////////////////////////////////// // END USER-CONFIGURABLE PARAMETERS /////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////// -// Variables that are initialized but remain static over the course of the simulation +// Variables that are initialized but remain static over the course of the +// simulation /////////////////////////////////////////////////////////////////////////////////////// -double dt; //Model time step (seconds) -int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task -int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task -int nranks, myrank; //Number of MPI ranks and my rank id -int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain -int mainproc; //Am I the main process (rank == 0)? -double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) -double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) -double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) -double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) -double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) +double dt; // Model time step (seconds) +int nx, nz; // Number of local grid cells in the x- and z- dimensions for this + // MPI task +int i_beg, k_beg; // beginning index in the x- and z-directions for this MPI + // task +int nranks, myrank; // Number of MPI ranks and my rank id +int left_rank, right_rank; // MPI Rank IDs that exist to my left and right in + // the global domain +int mainproc; // Am I the main process (rank == 0)? +double *hy_dens_cell; // hydrostatic density (vert cell avgs). Dimensions: + // (1-hs:nz+hs) +double *hy_dens_theta_cell; // hydrostatic rho*t (vert cell avgs). Dimensions: + // (1-hs:nz+hs) +double * + hy_dens_int; // hydrostatic density (vert cell interf). Dimensions: (1:nz+1) +double *hy_dens_theta_int; // hydrostatic rho*t (vert cell interf). Dimensions: + // (1:nz+1) +double *hy_pressure_int; // hydrostatic press (vert cell interf). Dimensions: + // (1:nz+1) /////////////////////////////////////////////////////////////////////////////////////// // Variables that are dynamics over the course of the simulation /////////////////////////////////////////////////////////////////////////////////////// -double etime; //Elapsed model time -double output_counter; //Helps determine when it's time to do output -//Runtime variable arrays -double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) -double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) -double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) -double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) -double *sendbuf_l; //Buffer to send data to the left MPI rank -double *sendbuf_r; //Buffer to send data to the right MPI rank -double *recvbuf_l; //Buffer to receive data from the left MPI rank -double *recvbuf_r; //Buffer to receive data from the right MPI rank -int num_out = 0; //The number of outputs performed so far -int direction_switch = 1; -double mass0, te0; //Initial domain totals for mass and total energy -double mass , te ; //Domain totals for mass and total energy - -//How is this not in the standard?! -double dmin( double a , double b ) { if (a= 0) output(state,etime); - - //////////////////////////////////////////////////// - // MAIN TIME STEP LOOP - //////////////////////////////////////////////////// + init(&argc, &argv); + +#pragma acc data copyin( \ + state_tmp[0 : (nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS], \ + hy_dens_cell[0 : nz + 2 * hs], hy_dens_theta_cell[0 : nz + 2 * hs], \ + hy_dens_int[0 : nz + 1], hy_dens_theta_int[0 : nz + 1], \ + hy_pressure_int[0 : nz + 1]) \ + create(flux[0 : (nz + 1) * (nx + 1) * NUM_VARS], \ + tend[0 : nz * nx * NUM_VARS], sendbuf_l[0 : hs * nz * NUM_VARS], \ + sendbuf_r[0 : hs * nz * NUM_VARS], \ + recvbuf_l[0 : hs * nz * NUM_VARS], \ + recvbuf_r[0 : hs * nz * NUM_VARS]) \ + copy(state[0 : (nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS]) + { + + // Initial reductions for mass, kinetic energy, and total energy + reductions(mass0, te0); + + // Output the initial state + if (output_freq >= 0) + output(state, etime); + + //////////////////////////////////////////////////// + // MAIN TIME STEP LOOP + //////////////////////////////////////////////////// #pragma acc wait - auto t1 = std::chrono::steady_clock::now(); - while (etime < sim_time) { - //If the time step leads to exceeding the simulation time, shorten it for the last step - if (etime + dt > sim_time) { dt = sim_time - etime; } - //Perform a single time step - perform_timestep(state,state_tmp,flux,tend,dt); - //Inform the user + auto t1 = std::chrono::steady_clock::now(); + while (etime < sim_time) { + // If the time step leads to exceeding the simulation time, shorten it for + // the last step + if (etime + dt > sim_time) { + dt = sim_time - etime; + } + // Perform a single time step + perform_timestep(state, state_tmp, flux, tend, dt); + // Inform the user #ifndef NO_INFORM - if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } + if (mainproc) { + printf("Elapsed Time: %lf / %lf\n", etime, sim_time); + } #endif - //Update the elapsed time and output counter - etime = etime + dt; - output_counter = output_counter + dt; - //If it's time for output, reset the counter, and do output - if (output_freq >= 0 && output_counter >= output_freq) { - output_counter = output_counter - output_freq; - output(state,etime); + // Update the elapsed time and output counter + etime = etime + dt; + output_counter = output_counter + dt; + // If it's time for output, reset the counter, and do output + if (output_freq >= 0 && output_counter >= output_freq) { + output_counter = output_counter - output_freq; + output(state, etime); + } } - } #pragma acc wait - auto t2 = std::chrono::steady_clock::now(); - if (mainproc) { - std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; - } + auto t2 = std::chrono::steady_clock::now(); + if (mainproc) { + std::cout << "CPU Time: " + << std::chrono::duration(t2 - t1).count() << " sec\n"; + } - //Final reductions for mass, kinetic energy, and total energy - reductions(mass,te); -} + // Final reductions for mass, kinetic energy, and total energy + reductions(mass, te); + } if (mainproc) { - printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); - printf( "d_te: %le\n" , (te - te0 )/te0 ); + printf("d_mass: %le\n", (mass - mass0) / mass0); + printf("d_te: %le\n", (te - te0) / te0); } finalize(); } - -//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator -//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the -//order of directions is alternated each time step. -//The Runge-Kutta method used here is defined as follows: -// q* = q[n] + dt/3 * rhs(q[n]) -// q** = q[n] + dt/2 * rhs(q* ) -// q[n+1] = q[n] + dt/1 * rhs(q** ) -void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) { +// Performs a single dimensionally split time step using a simple low-storage +// three-stage Runge-Kutta time integrator The dimensional splitting is a +// second-order-accurate alternating Strang splitting in which the order of +// directions is alternated each time step. The Runge-Kutta method used here is +// defined as follows: +// q* = q[n] + dt/3 * rhs(q[n]) +// q** = q[n] + dt/2 * rhs(q* ) +// q[n+1] = q[n] + dt/1 * rhs(q** ) +void perform_timestep(double *state, double *state_tmp, double *flux, + double *tend, double dt) { if (direction_switch) { - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend); + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend); } else { - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend); + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend); + } + if (direction_switch) { + direction_switch = 0; + } else { + direction_switch = 1; } - if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } } - -//Perform a single semi-discretized step in time with the form: -//state_out = state_init + dt * rhs(state_forcing) -//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out -void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) { +// Perform a single semi-discretized step in time with the form: +// state_out = state_init + dt * rhs(state_forcing) +// Meaning the step starts from state_init, computes the rhs using +// state_forcing, and stores the result in state_out +void semi_discrete_step(double *state_init, double *state_forcing, + double *state_out, double dt, int dir, double *flux, + double *tend) { int i, k, ll, inds, indt, indw; double x, z, wpert, dist, x0, z0, xrad, zrad, amp; - if (dir == DIR_X) { - //Set the halo values for this MPI task's fluid state in the x-direction + if (dir == DIR_X) { + // Set the halo values for this MPI task's fluid state in the x-direction set_halo_values_x(state_forcing); - //Compute the time tendencies for the fluid state in the x-direction - compute_tendencies_x(state_forcing,flux,tend,dt); + // Compute the time tendencies for the fluid state in the x-direction + compute_tendencies_x(state_forcing, flux, tend, dt); } else if (dir == DIR_Z) { - //Set the halo values for this MPI task's fluid state in the z-direction + // Set the halo values for this MPI task's fluid state in the z-direction set_halo_values_z(state_forcing); - //Compute the time tendencies for the fluid state in the z-direction - compute_tendencies_z(state_forcing,flux,tend,dt); + // Compute the time tendencies for the fluid state in the z-direction + compute_tendencies_z(state_forcing, flux, tend, dt); } - //Apply the tendencies to the fluid state + // Apply the tendencies to the fluid state #pragma acc parallel loop collapse(3) default(present) async - for (ll=0; ll , Oak Ridge National Laboratory -// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows -// For documentation, please see the attached documentation in the "documentation" folder +// This code simulates dry, stratified, compressible, non-hydrostatic fluid +// flows For documentation, please see the attached documentation in the +// "documentation" folder // ////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include +#include "pnetcdf.h" +#include #include #include +#include #include -#include "pnetcdf.h" -#include +#include +#include -constexpr double pi = 3.14159265358979323846264338327; //Pi -constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) -constexpr double cp = 1004.; //Specific heat of dry air at constant pressure -constexpr double cv = 717.; //Specific heat of dry air at constant volume -constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) -constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals -constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) -constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) - -//Define domain and stability-related constants -constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) -constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) -constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] -constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) -constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) -constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction -constexpr int sten_size = 4; //Size of the stencil used for interpolation - -//Parameters for indexing and flags -constexpr int NUM_VARS = 4; //Number of fluid state variables -constexpr int ID_DENS = 0; //index for density ("rho") -constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") -constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") -constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") -constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction -constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction -constexpr int DATA_SPEC_COLLISION = 1; -constexpr int DATA_SPEC_THERMAL = 2; -constexpr int DATA_SPEC_GRAVITY_WAVES = 3; +constexpr double pi = 3.14159265358979323846264338327; // Pi +constexpr double grav = 9.8; // Gravitational acceleration (m / s^2) +constexpr double cp = 1004.; // Specific heat of dry air at constant pressure +constexpr double cv = 717.; // Specific heat of dry air at constant volume +constexpr double rd = + 287.; // Dry air constant for equation of state (P=rho*rd*T) +constexpr double p0 = 1.e5; // Standard pressure at the surface in Pascals +constexpr double C0 = + 27.5629410929725921310572974482; // Constant to translate potential + // temperature into pressure + // (P=C0*(rho*theta)**gamma) +constexpr double gamm = + 1.40027894002789400278940027894; // gamma=cp/Rd , have to call this gamm + // because "gamma" is taken (I hate C so + // much) + +// Define domain and stability-related constants +constexpr double xlen = 2.e4; // Length of the domain in the x-direction + // (meters) +constexpr double zlen = 1.e4; // Length of the domain in the z-direction + // (meters) +constexpr double hv_beta = + 0.05; // How strong to diffuse the solution: hv_beta \in [0:1] +constexpr double cfl = + 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) +constexpr double max_speed = + 450; // Assumed maximum wave speed during the simulation (speed of sound + + // speed of wind) (meter / sec) +constexpr int hs = + 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a + // full "stencil" of information for reconstruction +constexpr int sten_size = 4; // Size of the stencil used for interpolation + +// Parameters for indexing and flags +constexpr int NUM_VARS = 4; // Number of fluid state variables +constexpr int ID_DENS = 0; // index for density ("rho") +constexpr int ID_UMOM = 1; // index for momentum in the x-direction ("rho * u") +constexpr int ID_WMOM = 2; // index for momentum in the z-direction ("rho * w") +constexpr int ID_RHOT = + 3; // index for density * potential temperature ("rho * theta") +constexpr int DIR_X = + 1; // Integer constant to express that this operation is in the x-direction +constexpr int DIR_Z = + 2; // Integer constant to express that this operation is in the z-direction +constexpr int DATA_SPEC_COLLISION = 1; +constexpr int DATA_SPEC_THERMAL = 2; +constexpr int DATA_SPEC_GRAVITY_WAVES = 3; constexpr int DATA_SPEC_DENSITY_CURRENT = 5; -constexpr int DATA_SPEC_INJECTION = 6; +constexpr int DATA_SPEC_INJECTION = 6; constexpr int nqpoints = 3; -constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; -constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; +constexpr double qpoints[] = {0.112701665379258311482073460022E0, + 0.500000000000000000000000000000E0, + 0.887298334620741688517926539980E0}; +constexpr double qweights[] = {0.277777777777777777777777777779E0, + 0.444444444444444444444444444444E0, + 0.277777777777777777777777777779E0}; /////////////////////////////////////////////////////////////////////////////////////// // BEGIN USER-CONFIGURABLE PARAMETERS /////////////////////////////////////////////////////////////////////////////////////// -//The x-direction length is twice as long as the z-direction length -//So, you'll want to have nx_glob be twice as large as nz_glob -int constexpr nx_glob = _NX; //Number of total cells in the x-direction -int constexpr nz_glob = _NZ; //Number of total cells in the z-direction -double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation -double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) -int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data -double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction -double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction +// The x-direction length is twice as long as the z-direction length +// So, you'll want to have nx_glob be twice as large as nz_glob +int constexpr nx_glob = _NX; // Number of total cells in the x-direction +int constexpr nz_glob = _NZ; // Number of total cells in the z-direction +double constexpr sim_time = _SIM_TIME; // How many seconds to run the simulation +double constexpr output_freq = + _OUT_FREQ; // How frequently to output data to file (in seconds) +int constexpr data_spec_int = _DATA_SPEC; // How to initialize the data +double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction +double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction /////////////////////////////////////////////////////////////////////////////////////// // END USER-CONFIGURABLE PARAMETERS /////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////// -// Variables that are initialized but remain static over the course of the simulation +// Variables that are initialized but remain static over the course of the +// simulation /////////////////////////////////////////////////////////////////////////////////////// -double dt; //Model time step (seconds) -int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task -int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task -int nranks, myrank; //Number of MPI ranks and my rank id -int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain -int mainproc; //Am I the main process (rank == 0)? -double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) -double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) -double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) -double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) -double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) +double dt; // Model time step (seconds) +int nx, nz; // Number of local grid cells in the x- and z- dimensions for this + // MPI task +int i_beg, k_beg; // beginning index in the x- and z-directions for this MPI + // task +int nranks, myrank; // Number of MPI ranks and my rank id +int left_rank, right_rank; // MPI Rank IDs that exist to my left and right in + // the global domain +int mainproc; // Am I the main process (rank == 0)? +double *hy_dens_cell; // hydrostatic density (vert cell avgs). Dimensions: + // (1-hs:nz+hs) +double *hy_dens_theta_cell; // hydrostatic rho*t (vert cell avgs). Dimensions: + // (1-hs:nz+hs) +double * + hy_dens_int; // hydrostatic density (vert cell interf). Dimensions: (1:nz+1) +double *hy_dens_theta_int; // hydrostatic rho*t (vert cell interf). Dimensions: + // (1:nz+1) +double *hy_pressure_int; // hydrostatic press (vert cell interf). Dimensions: + // (1:nz+1) /////////////////////////////////////////////////////////////////////////////////////// // Variables that are dynamics over the course of the simulation /////////////////////////////////////////////////////////////////////////////////////// -double etime; //Elapsed model time -double output_counter; //Helps determine when it's time to do output -//Runtime variable arrays -double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) -double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) -double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) -double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) -double *sendbuf_l; //Buffer to send data to the left MPI rank -double *sendbuf_r; //Buffer to send data to the right MPI rank -double *recvbuf_l; //Buffer to receive data from the left MPI rank -double *recvbuf_r; //Buffer to receive data from the right MPI rank -int num_out = 0; //The number of outputs performed so far -int direction_switch = 1; -double mass0, te0; //Initial domain totals for mass and total energy -double mass , te ; //Domain totals for mass and total energy - -//How is this not in the standard?! -double dmin( double a , double b ) { if (a= 0) output(state,etime); + // Output the initial state + if (output_freq >= 0) + output(state, etime); //////////////////////////////////////////////////// // MAIN TIME STEP LOOP //////////////////////////////////////////////////// auto t1 = std::chrono::steady_clock::now(); while (etime < sim_time) { - //If the time step leads to exceeding the simulation time, shorten it for the last step - if (etime + dt > sim_time) { dt = sim_time - etime; } - //Perform a single time step - perform_timestep(state,state_tmp,flux,tend,dt); - //Inform the user + // If the time step leads to exceeding the simulation time, shorten it for + // the last step + if (etime + dt > sim_time) { + dt = sim_time - etime; + } + // Perform a single time step + perform_timestep(state, state_tmp, flux, tend, dt); + // Inform the user #ifndef NO_INFORM - if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } + if (mainproc) { + printf("Elapsed Time: %lf / %lf\n", etime, sim_time); + } #endif - //Update the elapsed time and output counter + // Update the elapsed time and output counter etime = etime + dt; output_counter = output_counter + dt; - //If it's time for output, reset the counter, and do output + // If it's time for output, reset the counter, and do output if (output_freq >= 0 && output_counter >= output_freq) { output_counter = output_counter - output_freq; - output(state,etime); + output(state, etime); } } auto t2 = std::chrono::steady_clock::now(); if (mainproc) { - std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + std::cout << "CPU Time: " << std::chrono::duration(t2 - t1).count() + << " sec\n"; } - //Final reductions for mass, kinetic energy, and total energy - reductions(mass,te); + // Final reductions for mass, kinetic energy, and total energy + reductions(mass, te); if (mainproc) { - printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); - printf( "d_te: %le\n" , (te - te0 )/te0 ); + printf("d_mass: %le\n", (mass - mass0) / mass0); + printf("d_te: %le\n", (te - te0) / te0); } finalize(); } - -//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator -//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the -//order of directions is alternated each time step. -//The Runge-Kutta method used here is defined as follows: -// q* = q[n] + dt/3 * rhs(q[n]) -// q** = q[n] + dt/2 * rhs(q* ) -// q[n+1] = q[n] + dt/1 * rhs(q** ) -void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) { +// Performs a single dimensionally split time step using a simple low-storage +// three-stage Runge-Kutta time integrator The dimensional splitting is a +// second-order-accurate alternating Strang splitting in which the order of +// directions is alternated each time step. The Runge-Kutta method used here is +// defined as follows: +// q* = q[n] + dt/3 * rhs(q[n]) +// q** = q[n] + dt/2 * rhs(q* ) +// q[n+1] = q[n] + dt/1 * rhs(q** ) +void perform_timestep(double *state, double *state_tmp, double *flux, + double *tend, double dt) { + if (direction_switch) { + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend); + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend); + } else { + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend); + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend); + } if (direction_switch) { - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); + direction_switch = 0; } else { - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); + direction_switch = 1; } - if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } } - -//Perform a single semi-discretized step in time with the form: -//state_out = state_init + dt * rhs(state_forcing) -//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out -void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) { +// Perform a single semi-discretized step in time with the form: +// state_out = state_init + dt * rhs(state_forcing) +// Meaning the step starts from state_init, computes the rhs using +// state_forcing, and stores the result in state_out +void semi_discrete_step(double *state_init, double *state_forcing, + double *state_out, double dt, int dir, double *flux, + double *tend) { int i, k, ll, inds, indt, indw; double x, z, wpert, dist, x0, z0, xrad, zrad, amp; - if (dir == DIR_X) { - //Set the halo values for this MPI task's fluid state in the x-direction + if (dir == DIR_X) { + // Set the halo values for this MPI task's fluid state in the x-direction set_halo_values_x(state_forcing); - //Compute the time tendencies for the fluid state in the x-direction - compute_tendencies_x(state_forcing,flux,tend,dt); + // Compute the time tendencies for the fluid state in the x-direction + compute_tendencies_x(state_forcing, flux, tend, dt); } else if (dir == DIR_Z) { - //Set the halo values for this MPI task's fluid state in the z-direction + // Set the halo values for this MPI task's fluid state in the z-direction set_halo_values_z(state_forcing); - //Compute the time tendencies for the fluid state in the z-direction - compute_tendencies_z(state_forcing,flux,tend,dt); + // Compute the time tendencies for the fluid state in the z-direction + compute_tendencies_z(state_forcing, flux, tend, dt); } - //Apply the tendencies to the fluid state -#pragma omp parallel for private(inds,indt,x,z,x0,z0,xrad,zrad,amp,dist,wpert,indw) collapse(3) - for (ll=0; ll , Oak Ridge National Laboratory -// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows -// For documentation, please see the attached documentation in the "documentation" folder +// This code simulates dry, stratified, compressible, non-hydrostatic fluid +// flows For documentation, please see the attached documentation in the +// "documentation" folder // ////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include +#include "pnetcdf.h" +#include #include #include +#include #include -#include "pnetcdf.h" -#include +#include +#include -constexpr double pi = 3.14159265358979323846264338327; //Pi -constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) -constexpr double cp = 1004.; //Specific heat of dry air at constant pressure -constexpr double cv = 717.; //Specific heat of dry air at constant volume -constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) -constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals -constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) -constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) - -//Define domain and stability-related constants -constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) -constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) -constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] -constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) -constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) -constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction -constexpr int sten_size = 4; //Size of the stencil used for interpolation - -//Parameters for indexing and flags -constexpr int NUM_VARS = 4; //Number of fluid state variables -constexpr int ID_DENS = 0; //index for density ("rho") -constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") -constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") -constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") -constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction -constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction -constexpr int DATA_SPEC_COLLISION = 1; -constexpr int DATA_SPEC_THERMAL = 2; -constexpr int DATA_SPEC_GRAVITY_WAVES = 3; +constexpr double pi = 3.14159265358979323846264338327; // Pi +constexpr double grav = 9.8; // Gravitational acceleration (m / s^2) +constexpr double cp = 1004.; // Specific heat of dry air at constant pressure +constexpr double cv = 717.; // Specific heat of dry air at constant volume +constexpr double rd = + 287.; // Dry air constant for equation of state (P=rho*rd*T) +constexpr double p0 = 1.e5; // Standard pressure at the surface in Pascals +constexpr double C0 = + 27.5629410929725921310572974482; // Constant to translate potential + // temperature into pressure + // (P=C0*(rho*theta)**gamma) +constexpr double gamm = + 1.40027894002789400278940027894; // gamma=cp/Rd , have to call this gamm + // because "gamma" is taken (I hate C so + // much) + +// Define domain and stability-related constants +constexpr double xlen = 2.e4; // Length of the domain in the x-direction + // (meters) +constexpr double zlen = 1.e4; // Length of the domain in the z-direction + // (meters) +constexpr double hv_beta = + 0.05; // How strong to diffuse the solution: hv_beta \in [0:1] +constexpr double cfl = + 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) +constexpr double max_speed = + 450; // Assumed maximum wave speed during the simulation (speed of sound + + // speed of wind) (meter / sec) +constexpr int hs = + 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a + // full "stencil" of information for reconstruction +constexpr int sten_size = 4; // Size of the stencil used for interpolation + +// Parameters for indexing and flags +constexpr int NUM_VARS = 4; // Number of fluid state variables +constexpr int ID_DENS = 0; // index for density ("rho") +constexpr int ID_UMOM = 1; // index for momentum in the x-direction ("rho * u") +constexpr int ID_WMOM = 2; // index for momentum in the z-direction ("rho * w") +constexpr int ID_RHOT = + 3; // index for density * potential temperature ("rho * theta") +constexpr int DIR_X = + 1; // Integer constant to express that this operation is in the x-direction +constexpr int DIR_Z = + 2; // Integer constant to express that this operation is in the z-direction +constexpr int DATA_SPEC_COLLISION = 1; +constexpr int DATA_SPEC_THERMAL = 2; +constexpr int DATA_SPEC_GRAVITY_WAVES = 3; constexpr int DATA_SPEC_DENSITY_CURRENT = 5; -constexpr int DATA_SPEC_INJECTION = 6; +constexpr int DATA_SPEC_INJECTION = 6; constexpr int nqpoints = 3; -constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; -constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; +constexpr double qpoints[] = {0.112701665379258311482073460022E0, + 0.500000000000000000000000000000E0, + 0.887298334620741688517926539980E0}; +constexpr double qweights[] = {0.277777777777777777777777777779E0, + 0.444444444444444444444444444444E0, + 0.277777777777777777777777777779E0}; int asyncid = 1; /////////////////////////////////////////////////////////////////////////////////////// // BEGIN USER-CONFIGURABLE PARAMETERS /////////////////////////////////////////////////////////////////////////////////////// -//The x-direction length is twice as long as the z-direction length -//So, you'll want to have nx_glob be twice as large as nz_glob -int constexpr nx_glob = _NX; //Number of total cells in the x-direction -int constexpr nz_glob = _NZ; //Number of total cells in the z-direction -double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation -double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) -int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data -double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction -double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction +// The x-direction length is twice as long as the z-direction length +// So, you'll want to have nx_glob be twice as large as nz_glob +int constexpr nx_glob = _NX; // Number of total cells in the x-direction +int constexpr nz_glob = _NZ; // Number of total cells in the z-direction +double constexpr sim_time = _SIM_TIME; // How many seconds to run the simulation +double constexpr output_freq = + _OUT_FREQ; // How frequently to output data to file (in seconds) +int constexpr data_spec_int = _DATA_SPEC; // How to initialize the data +double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction +double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction /////////////////////////////////////////////////////////////////////////////////////// // END USER-CONFIGURABLE PARAMETERS /////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////// -// Variables that are initialized but remain static over the course of the simulation +// Variables that are initialized but remain static over the course of the +// simulation /////////////////////////////////////////////////////////////////////////////////////// -double dt; //Model time step (seconds) -int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task -int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task -int nranks, myrank; //Number of MPI ranks and my rank id -int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain -int mainproc; //Am I the main process (rank == 0)? -double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) -double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) -double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) -double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) -double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) +double dt; // Model time step (seconds) +int nx, nz; // Number of local grid cells in the x- and z- dimensions for this + // MPI task +int i_beg, k_beg; // beginning index in the x- and z-directions for this MPI + // task +int nranks, myrank; // Number of MPI ranks and my rank id +int left_rank, right_rank; // MPI Rank IDs that exist to my left and right in + // the global domain +int mainproc; // Am I the main process (rank == 0)? +double *hy_dens_cell; // hydrostatic density (vert cell avgs). Dimensions: + // (1-hs:nz+hs) +double *hy_dens_theta_cell; // hydrostatic rho*t (vert cell avgs). Dimensions: + // (1-hs:nz+hs) +double * + hy_dens_int; // hydrostatic density (vert cell interf). Dimensions: (1:nz+1) +double *hy_dens_theta_int; // hydrostatic rho*t (vert cell interf). Dimensions: + // (1:nz+1) +double *hy_pressure_int; // hydrostatic press (vert cell interf). Dimensions: + // (1:nz+1) /////////////////////////////////////////////////////////////////////////////////////// // Variables that are dynamics over the course of the simulation /////////////////////////////////////////////////////////////////////////////////////// -double etime; //Elapsed model time -double output_counter; //Helps determine when it's time to do output -//Runtime variable arrays -double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) -double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) -double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) -double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) -double *sendbuf_l; //Buffer to send data to the left MPI rank -double *sendbuf_r; //Buffer to send data to the right MPI rank -double *recvbuf_l; //Buffer to receive data from the left MPI rank -double *recvbuf_r; //Buffer to receive data from the right MPI rank -int num_out = 0; //The number of outputs performed so far -int direction_switch = 1; -double mass0, te0; //Initial domain totals for mass and total energy -double mass , te ; //Domain totals for mass and total energy - -//How is this not in the standard?! -double dmin( double a , double b ) { if (a= 0) output(state,etime); - - //////////////////////////////////////////////////// - // MAIN TIME STEP LOOP - //////////////////////////////////////////////////// + init(&argc, &argv); + +#pragma omp target data map( \ + to : state_tmp[ : (nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS], \ + hy_dens_cell[ : nz + 2 * hs], hy_dens_theta_cell[ : nz + 2 * hs], \ + hy_dens_int[ : nz + 1], hy_dens_theta_int[ : nz + 1], \ + hy_pressure_int[ : nz + 1]) \ + map(alloc : flux[ : (nz + 1) * (nx + 1) * NUM_VARS], \ + tend[ : nz * nx * NUM_VARS], sendbuf_l[ : hs * nz * NUM_VARS], \ + sendbuf_r[ : hs * nz * NUM_VARS], \ + recvbuf_l[ : hs * nz * NUM_VARS], \ + recvbuf_r[ : hs * nz * NUM_VARS]) \ + map(tofrom : state[ : (nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS]) + { + + // Initial reductions for mass, kinetic energy, and total energy + reductions(mass0, te0); + + // Output the initial state + if (output_freq >= 0) + output(state, etime); + + //////////////////////////////////////////////////// + // MAIN TIME STEP LOOP + //////////////////////////////////////////////////// #pragma omp taskwait - auto t1 = std::chrono::steady_clock::now(); - while (etime < sim_time) { - //If the time step leads to exceeding the simulation time, shorten it for the last step - if (etime + dt > sim_time) { dt = sim_time - etime; } - //Perform a single time step - perform_timestep(state,state_tmp,flux,tend,dt); - //Inform the user + auto t1 = std::chrono::steady_clock::now(); + while (etime < sim_time) { + // If the time step leads to exceeding the simulation time, shorten it for + // the last step + if (etime + dt > sim_time) { + dt = sim_time - etime; + } + // Perform a single time step + perform_timestep(state, state_tmp, flux, tend, dt); + // Inform the user #ifndef NO_INFORM - if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } + if (mainproc) { + printf("Elapsed Time: %lf / %lf\n", etime, sim_time); + } #endif - //Update the elapsed time and output counter - etime = etime + dt; - output_counter = output_counter + dt; - //If it's time for output, reset the counter, and do output - if (output_freq >= 0 && output_counter >= output_freq) { - output_counter = output_counter - output_freq; - output(state,etime); + // Update the elapsed time and output counter + etime = etime + dt; + output_counter = output_counter + dt; + // If it's time for output, reset the counter, and do output + if (output_freq >= 0 && output_counter >= output_freq) { + output_counter = output_counter - output_freq; + output(state, etime); + } } - } #pragma omp taskwait - auto t2 = std::chrono::steady_clock::now(); - if (mainproc) { - std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; - } + auto t2 = std::chrono::steady_clock::now(); + if (mainproc) { + std::cout << "CPU Time: " + << std::chrono::duration(t2 - t1).count() << " sec\n"; + } - //Final reductions for mass, kinetic energy, and total energy - reductions(mass,te); -} + // Final reductions for mass, kinetic energy, and total energy + reductions(mass, te); + } if (mainproc) { - printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); - printf( "d_te: %le\n" , (te - te0 )/te0 ); + printf("d_mass: %le\n", (mass - mass0) / mass0); + printf("d_te: %le\n", (te - te0) / te0); } finalize(); } - -//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator -//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the -//order of directions is alternated each time step. -//The Runge-Kutta method used here is defined as follows: -// q* = q[n] + dt/3 * rhs(q[n]) -// q** = q[n] + dt/2 * rhs(q* ) -// q[n+1] = q[n] + dt/1 * rhs(q** ) -void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) { +// Performs a single dimensionally split time step using a simple low-storage +// three-stage Runge-Kutta time integrator The dimensional splitting is a +// second-order-accurate alternating Strang splitting in which the order of +// directions is alternated each time step. The Runge-Kutta method used here is +// defined as follows: +// q* = q[n] + dt/3 * rhs(q[n]) +// q** = q[n] + dt/2 * rhs(q* ) +// q[n+1] = q[n] + dt/1 * rhs(q** ) +void perform_timestep(double *state, double *state_tmp, double *flux, + double *tend, double dt) { + if (direction_switch) { + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend); + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend); + } else { + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend); + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend); + } if (direction_switch) { - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); + direction_switch = 0; } else { - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); + direction_switch = 1; } - if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } } - -//Perform a single semi-discretized step in time with the form: -//state_out = state_init + dt * rhs(state_forcing) -//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out -void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) { - if (dir == DIR_X) { - //Set the halo values for this MPI task's fluid state in the x-direction +// Perform a single semi-discretized step in time with the form: +// state_out = state_init + dt * rhs(state_forcing) +// Meaning the step starts from state_init, computes the rhs using +// state_forcing, and stores the result in state_out +void semi_discrete_step(double *state_init, double *state_forcing, + double *state_out, double dt, int dir, double *flux, + double *tend) { + if (dir == DIR_X) { + // Set the halo values for this MPI task's fluid state in the x-direction set_halo_values_x(state_forcing); - //Compute the time tendencies for the fluid state in the x-direction - compute_tendencies_x(state_forcing,flux,tend,dt); + // Compute the time tendencies for the fluid state in the x-direction + compute_tendencies_x(state_forcing, flux, tend, dt); } else if (dir == DIR_Z) { - //Set the halo values for this MPI task's fluid state in the z-direction + // Set the halo values for this MPI task's fluid state in the z-direction set_halo_values_z(state_forcing); - //Compute the time tendencies for the fluid state in the z-direction - compute_tendencies_z(state_forcing,flux,tend,dt); + // Compute the time tendencies for the fluid state in the z-direction + compute_tendencies_z(state_forcing, flux, tend, dt); } - //Apply the tendencies to the fluid state -#pragma omp target teams distribute parallel for simd collapse(3) depend(inout:asyncid) nowait - for (int ll=0; ll , Oak Ridge National Laboratory -// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows -// For documentation, please see the attached documentation in the "documentation" folder +// This code simulates dry, stratified, compressible, non-hydrostatic fluid +// flows For documentation, please see the attached documentation in the +// "documentation" folder // ////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include +#include "pnetcdf.h" +#include #include #include +#include #include -#include "pnetcdf.h" -#include +#include +#include -constexpr double pi = 3.14159265358979323846264338327; //Pi -constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) -constexpr double cp = 1004.; //Specific heat of dry air at constant pressure -constexpr double cv = 717.; //Specific heat of dry air at constant volume -constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) -constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals -constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) -constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) -//Define domain and stability-related constants -constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) -constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) -constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] -constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) -constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) -constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction -constexpr int sten_size = 4; //Size of the stencil used for interpolation - -//Parameters for indexing and flags -constexpr int NUM_VARS = 4; //Number of fluid state variables -constexpr int ID_DENS = 0; //index for density ("rho") -constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") -constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") -constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") -constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction -constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction -constexpr int DATA_SPEC_COLLISION = 1; -constexpr int DATA_SPEC_THERMAL = 2; -constexpr int DATA_SPEC_GRAVITY_WAVES = 3; +constexpr double pi = 3.14159265358979323846264338327; // Pi +constexpr double grav = 9.8; // Gravitational acceleration (m / s^2) +constexpr double cp = 1004.; // Specific heat of dry air at constant pressure +constexpr double cv = 717.; // Specific heat of dry air at constant volume +constexpr double rd = + 287.; // Dry air constant for equation of state (P=rho*rd*T) +constexpr double p0 = 1.e5; // Standard pressure at the surface in Pascals +constexpr double C0 = + 27.5629410929725921310572974482; // Constant to translate potential + // temperature into pressure + // (P=C0*(rho*theta)**gamma) +constexpr double gamm = + 1.40027894002789400278940027894; // gamma=cp/Rd , have to call this gamm + // because "gamma" is taken (I hate C so + // much) +// Define domain and stability-related constants +constexpr double xlen = 2.e4; // Length of the domain in the x-direction + // (meters) +constexpr double zlen = 1.e4; // Length of the domain in the z-direction + // (meters) +constexpr double hv_beta = + 0.05; // How strong to diffuse the solution: hv_beta \in [0:1] +constexpr double cfl = + 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) +constexpr double max_speed = + 450; // Assumed maximum wave speed during the simulation (speed of sound + + // speed of wind) (meter / sec) +constexpr int hs = + 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a + // full "stencil" of information for reconstruction +constexpr int sten_size = 4; // Size of the stencil used for interpolation + +// Parameters for indexing and flags +constexpr int NUM_VARS = 4; // Number of fluid state variables +constexpr int ID_DENS = 0; // index for density ("rho") +constexpr int ID_UMOM = 1; // index for momentum in the x-direction ("rho * u") +constexpr int ID_WMOM = 2; // index for momentum in the z-direction ("rho * w") +constexpr int ID_RHOT = + 3; // index for density * potential temperature ("rho * theta") +constexpr int DIR_X = + 1; // Integer constant to express that this operation is in the x-direction +constexpr int DIR_Z = + 2; // Integer constant to express that this operation is in the z-direction +constexpr int DATA_SPEC_COLLISION = 1; +constexpr int DATA_SPEC_THERMAL = 2; +constexpr int DATA_SPEC_GRAVITY_WAVES = 3; constexpr int DATA_SPEC_DENSITY_CURRENT = 5; -constexpr int DATA_SPEC_INJECTION = 6; +constexpr int DATA_SPEC_INJECTION = 6; constexpr int nqpoints = 3; -constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; -constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; +constexpr double qpoints[] = {0.112701665379258311482073460022E0, + 0.500000000000000000000000000000E0, + 0.887298334620741688517926539980E0}; +constexpr double qweights[] = {0.277777777777777777777777777779E0, + 0.444444444444444444444444444444E0, + 0.277777777777777777777777777779E0}; /////////////////////////////////////////////////////////////////////////////////////// // BEGIN USER-CONFIGURABLE PARAMETERS /////////////////////////////////////////////////////////////////////////////////////// -//The x-direction length is twice as long as the z-direction length -//So, you'll want to have nx_glob be twice as large as nz_glob -int constexpr nx_glob = _NX; //Number of total cells in the x-direction -int constexpr nz_glob = _NZ; //Number of total cells in the z-direction -double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation -double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) -int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data -double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction -double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction +// The x-direction length is twice as long as the z-direction length +// So, you'll want to have nx_glob be twice as large as nz_glob +int constexpr nx_glob = _NX; // Number of total cells in the x-direction +int constexpr nz_glob = _NZ; // Number of total cells in the z-direction +double constexpr sim_time = _SIM_TIME; // How many seconds to run the simulation +double constexpr output_freq = + _OUT_FREQ; // How frequently to output data to file (in seconds) +int constexpr data_spec_int = _DATA_SPEC; // How to initialize the data +double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction +double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction /////////////////////////////////////////////////////////////////////////////////////// // END USER-CONFIGURABLE PARAMETERS /////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////// -// Variables that are initialized but remain static over the course of the simulation +// Variables that are initialized but remain static over the course of the +// simulation /////////////////////////////////////////////////////////////////////////////////////// -double dt; //Model time step (seconds) -int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task -int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task -int nranks, myrank; //Number of MPI ranks and my rank id -int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain -int mainproc; //Am I the main process (rank == 0)? -double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) -double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) -double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) -double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) -double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) +double dt; // Model time step (seconds) +int nx, nz; // Number of local grid cells in the x- and z- dimensions for this + // MPI task +int i_beg, k_beg; // beginning index in the x- and z-directions for this MPI + // task +int nranks, myrank; // Number of MPI ranks and my rank id +int left_rank, right_rank; // MPI Rank IDs that exist to my left and right in + // the global domain +int mainproc; // Am I the main process (rank == 0)? +double *hy_dens_cell; // hydrostatic density (vert cell avgs). Dimensions: + // (1-hs:nz+hs) +double *hy_dens_theta_cell; // hydrostatic rho*t (vert cell avgs). Dimensions: + // (1-hs:nz+hs) +double * + hy_dens_int; // hydrostatic density (vert cell interf). Dimensions: (1:nz+1) +double *hy_dens_theta_int; // hydrostatic rho*t (vert cell interf). Dimensions: + // (1:nz+1) +double *hy_pressure_int; // hydrostatic press (vert cell interf). Dimensions: + // (1:nz+1) /////////////////////////////////////////////////////////////////////////////////////// // Variables that are dynamics over the course of the simulation /////////////////////////////////////////////////////////////////////////////////////// -double etime; //Elapsed model time -double output_counter; //Helps determine when it's time to do output -//Runtime variable arrays -double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) -double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) -double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) -double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) -int num_out = 0; //The number of outputs performed so far -int direction_switch = 1; -double mass0, te0; //Initial domain totals for mass and total energy -double mass , te ; //Domain totals for mass and total energy - -//How is this not in the standard?! -double dmin( double a , double b ) { if (a sim_time) { dt = sim_time - etime; } - //Perform a single time step - perform_timestep(state,state_tmp,flux,tend,dt); - //Inform the user + // If the time step leads to exceeding the simulation time, shorten it for + // the last step + if (etime + dt > sim_time) { + dt = sim_time - etime; + } + // Perform a single time step + perform_timestep(state, state_tmp, flux, tend, dt); + // Inform the user #ifndef NO_INFORM - if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } + if (mainproc) { + printf("Elapsed Time: %lf / %lf\n", etime, sim_time); + } #endif - //Update the elapsed time and output counter + // Update the elapsed time and output counter etime = etime + dt; output_counter = output_counter + dt; - //If it's time for output, reset the counter, and do output + // If it's time for output, reset the counter, and do output if (output_counter >= output_freq) { output_counter = output_counter - output_freq; - output(state,etime); + output(state, etime); } } auto t2 = std::chrono::steady_clock::now(); if (mainproc) { - std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + std::cout << "CPU Time: " << std::chrono::duration(t2 - t1).count() + << " sec\n"; } - //Final reductions for mass, kinetic energy, and total energy - reductions(mass,te); + // Final reductions for mass, kinetic energy, and total energy + reductions(mass, te); if (mainproc) { - printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); - printf( "d_te: %le\n" , (te - te0 )/te0 ); + printf("d_mass: %le\n", (mass - mass0) / mass0); + printf("d_te: %le\n", (te - te0) / te0); } finalize(); } -//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator -//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the -//order of directions is alternated each time step. -//The Runge-Kutta method used here is defined as follows: -// q* = q[n] + dt/3 * rhs(q[n]) -// q** = q[n] + dt/2 * rhs(q* ) -// q[n+1] = q[n] + dt/1 * rhs(q** ) -void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) { +// Performs a single dimensionally split time step using a simple low-storage +// three-stage Runge-Kutta time integrator The dimensional splitting is a +// second-order-accurate alternating Strang splitting in which the order of +// directions is alternated each time step. The Runge-Kutta method used here is +// defined as follows: +// q* = q[n] + dt/3 * rhs(q[n]) +// q** = q[n] + dt/2 * rhs(q* ) +// q[n+1] = q[n] + dt/1 * rhs(q** ) +void perform_timestep(double *state, double *state_tmp, double *flux, + double *tend, double dt) { if (direction_switch) { - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend); + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend); } else { - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend); + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend); + } + if (direction_switch) { + direction_switch = 0; + } else { + direction_switch = 1; } - if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } } - -//Perform a single semi-discretized step in time with the form: -//state_out = state_init + dt * rhs(state_forcing) -//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out -void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) { +// Perform a single semi-discretized step in time with the form: +// state_out = state_init + dt * rhs(state_forcing) +// Meaning the step starts from state_init, computes the rhs using +// state_forcing, and stores the result in state_out +void semi_discrete_step(double *state_init, double *state_forcing, + double *state_out, double dt, int dir, double *flux, + double *tend) { int i, k, ll, inds, indt, indw; double x, z, wpert, dist, x0, z0, xrad, zrad, amp; - if (dir == DIR_X) { - //Set the halo values for this MPI task's fluid state in the x-direction + if (dir == DIR_X) { + // Set the halo values for this MPI task's fluid state in the x-direction set_halo_values_x(state_forcing); - //Compute the time tendencies for the fluid state in the x-direction - compute_tendencies_x(state_forcing,flux,tend,dt); + // Compute the time tendencies for the fluid state in the x-direction + compute_tendencies_x(state_forcing, flux, tend, dt); } else if (dir == DIR_Z) { - //Set the halo values for this MPI task's fluid state in the z-direction + // Set the halo values for this MPI task's fluid state in the z-direction set_halo_values_z(state_forcing); - //Compute the time tendencies for the fluid state in the z-direction - compute_tendencies_z(state_forcing,flux,tend,dt); + // Compute the time tendencies for the fluid state in the z-direction + compute_tendencies_z(state_forcing, flux, tend, dt); } ///////////////////////////////////////////////// // TODO: THREAD ME ///////////////////////////////////////////////// - //Apply the tendencies to the fluid state - for (ll=0; ll inline T min( T val1 , T val2 ) { - return val1 < val2 ? val1 : val2 ; +template inline T min(T val1, T val2) { + return val1 < val2 ? val1 : val2; } -template inline T abs( T val ) { - return val > 0 ? val : -val; -} +template inline T abs(T val) { return val > 0 ? val : -val; } #ifdef SIMD_LEN - unsigned int constexpr simd_len = SIMD_LEN; +unsigned int constexpr simd_len = SIMD_LEN; #else - unsigned int constexpr simd_len = 4; +unsigned int constexpr simd_len = 4; #endif +using yakl::simd::iterate_over_pack; using yakl::simd::Pack; using yakl::simd::PackIterConfig; -using yakl::simd::iterate_over_pack; - diff --git a/cpp_yakl/experimental/miniWeather_mpi_parallelfor_simd_x.cpp b/cpp_yakl/experimental/miniWeather_mpi_parallelfor_simd_x.cpp index 7efca9d2..2f43f287 100644 --- a/cpp_yakl/experimental/miniWeather_mpi_parallelfor_simd_x.cpp +++ b/cpp_yakl/experimental/miniWeather_mpi_parallelfor_simd_x.cpp @@ -2,115 +2,139 @@ ////////////////////////////////////////////////////////////////////////////////////////// // miniWeather // Author: Matt Norman , Oak Ridge National Laboratory -// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows -// For documentation, please see the attached documentation in the "documentation" folder +// This code simulates dry, stratified, compressible, non-hydrostatic fluid +// flows For documentation, please see the attached documentation in the +// "documentation" folder // ////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include #include "../const.h" #include "pnetcdf.h" -#include #include +#include +#include +#include +#include -// We're going to define all arrays on the host because this doesn't use parallel_for -typedef yakl::Array real1d; -typedef yakl::Array real2d; -typedef yakl::Array real3d; -typedef yakl::Array doub1d; -typedef yakl::Array doub2d; -typedef yakl::Array doub3d; - -typedef yakl::Array realConst1d; -typedef yakl::Array realConst2d; -typedef yakl::Array realConst3d; -typedef yakl::Array doubConst1d; -typedef yakl::Array doubConst2d; -typedef yakl::Array doubConst3d; - -// Some arrays still need to be on the host, so we will explicitly create Host Array typedefs -typedef yakl::Array real1dHost; -typedef yakl::Array real2dHost; -typedef yakl::Array real3dHost; -typedef yakl::Array doub1dHost; -typedef yakl::Array doub2dHost; -typedef yakl::Array doub3dHost; +// We're going to define all arrays on the host because this doesn't use +// parallel_for +typedef yakl::Array real1d; +typedef yakl::Array real2d; +typedef yakl::Array real3d; +typedef yakl::Array doub1d; +typedef yakl::Array doub2d; +typedef yakl::Array doub3d; + +typedef yakl::Array realConst1d; +typedef yakl::Array realConst2d; +typedef yakl::Array realConst3d; +typedef yakl::Array doubConst1d; +typedef yakl::Array doubConst2d; +typedef yakl::Array doubConst3d; + +// Some arrays still need to be on the host, so we will explicitly create Host +// Array typedefs +typedef yakl::Array real1dHost; +typedef yakl::Array real2dHost; +typedef yakl::Array real3dHost; +typedef yakl::Array doub1dHost; +typedef yakl::Array doub2dHost; +typedef yakl::Array doub3dHost; /////////////////////////////////////////////////////////////////////////////////////// -// Variables that are initialized but remain static over the course of the simulation +// Variables that are initialized but remain static over the course of the +// simulation /////////////////////////////////////////////////////////////////////////////////////// struct Fixed_data { - int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task - int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task - int nranks, myrank; //Number of MPI ranks and my rank id - int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain - int mainproc; //Am I the main process (rank == 0)? - realConst1d hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) - realConst1d hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) - realConst1d hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) - realConst1d hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) - realConst1d hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) + int nx, nz; // Number of local grid cells in the x- and z- dimensions for this + // MPI task + int i_beg, + k_beg; // beginning index in the x- and z-directions for this MPI task + int nranks, myrank; // Number of MPI ranks and my rank id + int left_rank, right_rank; // MPI Rank IDs that exist to my left and right in + // the global domain + int mainproc; // Am I the main process (rank == 0)? + realConst1d hy_dens_cell; // hydrostatic density (vert cell avgs). Dimensions: + // (1-hs:nz+hs) + realConst1d hy_dens_theta_cell; // hydrostatic rho*t (vert cell avgs). + // Dimensions: (1-hs:nz+hs) + realConst1d hy_dens_int; // hydrostatic density (vert cell interf). + // Dimensions: (1:nz+1) + realConst1d hy_dens_theta_int; // hydrostatic rho*t (vert cell interf). + // Dimensions: (1:nz+1) + realConst1d hy_pressure_int; // hydrostatic press (vert cell interf). + // Dimensions: (1:nz+1) }; /////////////////////////////////////////////////////////////////////////////////////// // Variables that are dynamics over the course of the simulation /////////////////////////////////////////////////////////////////////////////////////// -//Declaring the functions defined after "main" -void init ( real3d &state , real &dt , Fixed_data &fixed_data ); -void finalize ( ); -YAKL_INLINE void injection ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -YAKL_INLINE void density_current ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -YAKL_INLINE void gravity_waves ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -YAKL_INLINE void thermal ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -YAKL_INLINE void collision ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -YAKL_INLINE void hydro_const_theta ( real z , real &r , real &t ); -YAKL_INLINE void hydro_const_bvfreq ( real z , real bv_freq0 , real &r , real &t ); -YAKL_INLINE real sample_ellipse_cosine( real x , real z , real amp , real x0 , real z0 , real xrad , real zrad ); -void output ( realConst3d state , real etime , int &num_out , Fixed_data const &fixed_data ); -void ncwrap ( int ierr , int line ); -void perform_timestep ( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ); -void semi_discrete_step ( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , - int dir , Fixed_data const &fixed_data ); -void compute_tendencies_x ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ); -void compute_tendencies_z ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ); -void set_halo_values_x ( real3d const &state , Fixed_data const &fixed_data ); -void set_halo_values_z ( real3d const &state , Fixed_data const &fixed_data ); -void reductions ( realConst3d state , double &mass , double &te , Fixed_data const &fixed_data ); - +// Declaring the functions defined after "main" +void init(real3d &state, real &dt, Fixed_data &fixed_data); +void finalize(); +YAKL_INLINE void injection(real x, real z, real &r, real &u, real &w, real &t, + real &hr, real &ht); +YAKL_INLINE void density_current(real x, real z, real &r, real &u, real &w, + real &t, real &hr, real &ht); +YAKL_INLINE void gravity_waves(real x, real z, real &r, real &u, real &w, + real &t, real &hr, real &ht); +YAKL_INLINE void thermal(real x, real z, real &r, real &u, real &w, real &t, + real &hr, real &ht); +YAKL_INLINE void collision(real x, real z, real &r, real &u, real &w, real &t, + real &hr, real &ht); +YAKL_INLINE void hydro_const_theta(real z, real &r, real &t); +YAKL_INLINE void hydro_const_bvfreq(real z, real bv_freq0, real &r, real &t); +YAKL_INLINE real sample_ellipse_cosine(real x, real z, real amp, real x0, + real z0, real xrad, real zrad); +void output(realConst3d state, real etime, int &num_out, + Fixed_data const &fixed_data); +void ncwrap(int ierr, int line); +void perform_timestep(real3d const &state, real dt, int &direction_switch, + Fixed_data const &fixed_data); +void semi_discrete_step(realConst3d state_init, real3d const &state_forcing, + real3d const &state_out, real dt, int dir, + Fixed_data const &fixed_data); +void compute_tendencies_x(realConst3d state, real3d const &tend, real dt, + Fixed_data const &fixed_data); +void compute_tendencies_z(realConst3d state, real3d const &tend, real dt, + Fixed_data const &fixed_data); +void set_halo_values_x(real3d const &state, Fixed_data const &fixed_data); +void set_halo_values_z(real3d const &state, Fixed_data const &fixed_data); +void reductions(realConst3d state, double &mass, double &te, + Fixed_data const &fixed_data); /////////////////////////////////////////////////////////////////////////////////////// // THE MAIN PROGRAM STARTS HERE /////////////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { - MPI_Init(&argc,&argv); + MPI_Init(&argc, &argv); yakl::init(); { Fixed_data fixed_data; real3d state; - real dt; //Model time step (seconds) + real dt; // Model time step (seconds) // Init allocates the state and hydrostatic arrays hy_* - init( state , dt , fixed_data ); + init(state, dt, fixed_data); auto &mainproc = fixed_data.mainproc; - //Initial reductions for mass, kinetic energy, and total energy + // Initial reductions for mass, kinetic energy, and total energy double mass0, te0; - reductions(state,mass0,te0,fixed_data); + reductions(state, mass0, te0, fixed_data); - int num_out = 0; //The number of outputs performed so far - real output_counter = 0; //Helps determine when it's time to do output + int num_out = 0; // The number of outputs performed so far + real output_counter = 0; // Helps determine when it's time to do output real etime = 0; - //Output the initial state + // Output the initial state if (output_freq >= 0) { - output(state,etime,num_out,fixed_data); + output(state, etime, num_out, fixed_data); } - int direction_switch = 1; // Tells dimensionally split which order to take x,z solves + int direction_switch = + 1; // Tells dimensionally split which order to take x,z solves //////////////////////////////////////////////////// // MAIN TIME STEP LOOP @@ -118,36 +142,42 @@ int main(int argc, char **argv) { yakl::fence(); auto t1 = std::chrono::steady_clock::now(); while (etime < sim_time) { - //If the time step leads to exceeding the simulation time, shorten it for the last step - if (etime + dt > sim_time) { dt = sim_time - etime; } - //Perform a single time step - perform_timestep(state,dt,direction_switch,fixed_data); - //Inform the user - #ifndef NO_INFORM - if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } - #endif - //Update the elapsed time and output counter + // If the time step leads to exceeding the simulation time, shorten it for + // the last step + if (etime + dt > sim_time) { + dt = sim_time - etime; + } + // Perform a single time step + perform_timestep(state, dt, direction_switch, fixed_data); +// Inform the user +#ifndef NO_INFORM + if (mainproc) { + printf("Elapsed Time: %lf / %lf\n", etime, sim_time); + } +#endif + // Update the elapsed time and output counter etime = etime + dt; output_counter = output_counter + dt; - //If it's time for output, reset the counter, and do output + // If it's time for output, reset the counter, and do output if (output_freq >= 0 && output_counter >= output_freq) { output_counter = output_counter - output_freq; - output(state,etime,num_out,fixed_data); + output(state, etime, num_out, fixed_data); } } yakl::fence(); auto t2 = std::chrono::steady_clock::now(); if (mainproc) { - std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + std::cout << "CPU Time: " + << std::chrono::duration(t2 - t1).count() << " sec\n"; } - //Final reductions for mass, kinetic energy, and total energy + // Final reductions for mass, kinetic energy, and total energy double mass, te; - reductions(state,mass,te,fixed_data); + reductions(state, mass, te, fixed_data); if (mainproc) { - printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); - printf( "d_te: %le\n" , (te - te0 )/te0 ); + printf("d_mass: %le\n", (mass - mass0) / mass0); + printf("d_te: %le\n", (te - te0) / te0); } finalize(); @@ -156,251 +186,284 @@ int main(int argc, char **argv) { MPI_Finalize(); } +// Performs a single dimensionally split time step using a simple low-storage +// three-stage Runge-Kutta time integrator The dimensional splitting is a +// second-order-accurate alternating Strang splitting in which the order of +// directions is alternated each time step. The Runge-Kutta method used here is +// defined as follows: +// q* = q_n + dt/3 * rhs(q_n) +// q** = q_n + dt/2 * rhs(q* ) +// q_n+1 = q_n + dt/1 * rhs(q**) +void perform_timestep(real3d const &state, real dt, int &direction_switch, + Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + + real3d state_tmp("state_tmp", NUM_VARS, nz + 2 * hs, nx + 2 * hs); -//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator -//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the -//order of directions is alternated each time step. -//The Runge-Kutta method used here is defined as follows: -// q* = q_n + dt/3 * rhs(q_n) -// q** = q_n + dt/2 * rhs(q* ) -// q_n+1 = q_n + dt/1 * rhs(q**) -void perform_timestep( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - - real3d state_tmp("state_tmp",NUM_VARS,nz+2*hs,nx+2*hs); - if (direction_switch) { - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data ); - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data ); + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, fixed_data); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, fixed_data); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, fixed_data); + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, fixed_data); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, fixed_data); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, fixed_data); + } else { + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, fixed_data); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, fixed_data); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, fixed_data); + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, fixed_data); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, fixed_data); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, fixed_data); + } + if (direction_switch) { + direction_switch = 0; } else { - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data ); - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data ); + direction_switch = 1; } - if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } } - -//Perform a single semi-discretized step in time with the form: -//state_out = state_init + dt * rhs(state_forcing) -//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out -void semi_discrete_step( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &i_beg = fixed_data.i_beg ; - auto &k_beg = fixed_data.k_beg ; - auto &hy_dens_cell = fixed_data.hy_dens_cell ; - - real3d tend("tend",NUM_VARS,nz,nx); - - if (dir == DIR_X) { - //Set the halo values for this MPI task's fluid state in the x-direction +// Perform a single semi-discretized step in time with the form: +// state_out = state_init + dt * rhs(state_forcing) +// Meaning the step starts from state_init, computes the rhs using +// state_forcing, and stores the result in state_out +void semi_discrete_step(realConst3d state_init, real3d const &state_forcing, + real3d const &state_out, real dt, int dir, + Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + auto &i_beg = fixed_data.i_beg; + auto &k_beg = fixed_data.k_beg; + auto &hy_dens_cell = fixed_data.hy_dens_cell; + + real3d tend("tend", NUM_VARS, nz, nx); + + if (dir == DIR_X) { + // Set the halo values for this MPI task's fluid state in the x-direction yakl::timer_start("halo x"); - set_halo_values_x(state_forcing,fixed_data); + set_halo_values_x(state_forcing, fixed_data); yakl::timer_stop("halo x"); - //Compute the time tendencies for the fluid state in the x-direction + // Compute the time tendencies for the fluid state in the x-direction yakl::timer_start("tendencies x"); - compute_tendencies_x(state_forcing,tend,dt,fixed_data); + compute_tendencies_x(state_forcing, tend, dt, fixed_data); yakl::timer_stop("tendencies x"); } else if (dir == DIR_Z) { - //Set the halo values for this MPI task's fluid state in the z-direction + // Set the halo values for this MPI task's fluid state in the z-direction yakl::timer_start("halo z"); - set_halo_values_z(state_forcing,fixed_data); + set_halo_values_z(state_forcing, fixed_data); yakl::timer_stop("halo z"); - //Compute the time tendencies for the fluid state in the z-direction + // Compute the time tendencies for the fluid state in the z-direction yakl::timer_start("tendencies z"); - compute_tendencies_z(state_forcing,tend,dt,fixed_data); + compute_tendencies_z(state_forcing, tend, dt, fixed_data); yakl::timer_stop("tendencies z"); } - //Apply the tendencies to the fluid state - // for (ll=0; ll(NUM_VARS,nz,nx) , YAKL_LAMBDA ( int ll, int k, int i ) { - if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) { - real x = (i_beg + i+0.5)*dx; - real z = (k_beg + k+0.5)*dz; - real wpert = sample_ellipse_cosine( x,z , 0.01 , xlen/8,1000., 500.,500. ); - tend(ID_WMOM,k,i) += wpert*hy_dens_cell(hs+k); - } - state_out(ll,hs+k,hs+i) = state_init(ll,hs+k,hs+i) + dt * tend(ll,k,i); - }); + parallel_for( + SimpleBounds<3>(NUM_VARS, nz, nx), YAKL_LAMBDA(int ll, int k, int i) { + if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) { + real x = (i_beg + i + 0.5) * dx; + real z = (k_beg + k + 0.5) * dz; + real wpert = + sample_ellipse_cosine(x, z, 0.01, xlen / 8, 1000., 500., 500.); + tend(ID_WMOM, k, i) += wpert * hy_dens_cell(hs + k); + } + state_out(ll, hs + k, hs + i) = + state_init(ll, hs + k, hs + i) + dt * tend(ll, k, i); + }); yakl::timer_stop("apply tendencies"); } - -//Compute the time tendencies of the fluid state using forcing in the x-direction -//Since the halos are set in a separate routine, this will not require MPI -//First, compute the flux vector at each cell interface in the x-direction (including hyperviscosity) -//Then, compute the tendencies using those fluxes -void compute_tendencies_x( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &hy_dens_cell = fixed_data.hy_dens_cell ; +// Compute the time tendencies of the fluid state using forcing in the +// x-direction Since the halos are set in a separate routine, this will not +// require MPI First, compute the flux vector at each cell interface in the +// x-direction (including hyperviscosity) Then, compute the tendencies using +// those fluxes +void compute_tendencies_x(realConst3d state, real3d const &tend, real dt, + Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + auto &hy_dens_cell = fixed_data.hy_dens_cell; auto &hy_dens_theta_cell = fixed_data.hy_dens_theta_cell; - real3d flux("flux",NUM_VARS,nz,nx+1); + real3d flux("flux", NUM_VARS, nz, nx + 1); + + // Compute fluxes in the x-direction for each cell + // for (k=0; k(nz, xblocks), YAKL_LAMBDA(int k, int iblk) { + SArray, 1, 4> stencil; + SArray, 1, NUM_VARS> d3_vals; + SArray, 1, NUM_VARS> vals; + // Compute the hyperviscosity coefficient + real hv_coef = -hv_beta * dx / (16 * dt); + + // Use fourth-order interpolation from four cell averages to compute the + // value at the interface in question + for (int ll = 0; ll < NUM_VARS; ll++) { + for (int s = 0; s < sten_size; s++) { + iterate_over_pack( + [&](unsigned int ilane) { + int i = std::min(xdim - 1, iblk * simd_len + ilane); + stencil(s)(ilane) = state(ll, hs + k, i + s); + }, + PackIterConfig()); + } + // Fourth-order-accurate interpolation of the state + vals(ll) = -stencil(0) / 12 + 7 * stencil(1) / 12 + + 7 * stencil(2) / 12 - stencil(3) / 12; + // First-order-accurate interpolation of the third spatial derivative + // of the state (for artificial viscosity) + d3_vals(ll) = + -stencil(0) + 3 * stencil(1) - 3 * stencil(2) + stencil(3); + } - //Compute fluxes in the x-direction for each cell - // for (k=0; k(nz,xblocks) , YAKL_LAMBDA (int k, int iblk) { - SArray,1,4> stencil; - SArray,1,NUM_VARS> d3_vals; - SArray,1,NUM_VARS> vals; - //Compute the hyperviscosity coefficient - real hv_coef = -hv_beta * dx / (16*dt); - - //Use fourth-order interpolation from four cell averages to compute the value at the interface in question - for (int ll=0; ll() ); - } - //Fourth-order-accurate interpolation of the state - vals(ll) = -stencil(0)/12 + 7*stencil(1)/12 + 7*stencil(2)/12 - stencil(3)/12; - //First-order-accurate interpolation of the third spatial derivative of the state (for artificial viscosity) - d3_vals(ll) = -stencil(0) + 3*stencil(1) - 3*stencil(2) + stencil(3); - } + // Compute density, u-wind, w-wind, potential temperature, and pressure + // (r,u,w,t,p respectively) + auto r = vals(ID_DENS) + hy_dens_cell(hs + k); + auto u = vals(ID_UMOM) / r; + auto w = vals(ID_WMOM) / r; + auto t = (vals(ID_RHOT) + hy_dens_theta_cell(hs + k)) / r; + auto p = C0 * pow((r * t), gamm); + + auto f1 = r * u - hv_coef * d3_vals(ID_DENS); + auto f2 = r * u * u + p - hv_coef * d3_vals(ID_UMOM); + auto f3 = r * u * w - hv_coef * d3_vals(ID_WMOM); + auto f4 = r * u * t - hv_coef * d3_vals(ID_RHOT); + + // Compute the flux vector + iterate_over_pack( + [&](unsigned int ilane) { + int i = std::min(xdim - 1, iblk * simd_len + ilane); + flux(ID_DENS, k, i) = f1(ilane); + flux(ID_UMOM, k, i) = f2(ilane); + flux(ID_WMOM, k, i) = f3(ilane); + flux(ID_RHOT, k, i) = f4(ilane); + }, + PackIterConfig()); + }); - //Compute density, u-wind, w-wind, potential temperature, and pressure (r,u,w,t,p respectively) - auto r = vals(ID_DENS) + hy_dens_cell(hs+k); - auto u = vals(ID_UMOM) / r; - auto w = vals(ID_WMOM) / r; - auto t = ( vals(ID_RHOT) + hy_dens_theta_cell(hs+k) ) / r; - auto p = C0*pow((r*t),gamm); - - auto f1 = r*u - hv_coef*d3_vals(ID_DENS); - auto f2 = r*u*u+p - hv_coef*d3_vals(ID_UMOM); - auto f3 = r*u*w - hv_coef*d3_vals(ID_WMOM); - auto f4 = r*u*t - hv_coef*d3_vals(ID_RHOT); - - //Compute the flux vector - iterate_over_pack( [&] (unsigned int ilane) { - int i = std::min(xdim-1 , iblk*simd_len + ilane); - flux(ID_DENS,k,i) = f1(ilane); - flux(ID_UMOM,k,i) = f2(ilane); - flux(ID_WMOM,k,i) = f3(ilane); - flux(ID_RHOT,k,i) = f4(ilane); - } , PackIterConfig() ); - }); - - //Use the fluxes to compute tendencies for each cell - // for (ll=0; ll(NUM_VARS,nz,nx) , YAKL_LAMBDA ( int ll, int k, int i ) { - tend(ll,k,i) = -( flux(ll,k,i+1) - flux(ll,k,i) ) / dx; - }); + // Use the fluxes to compute tendencies for each cell + // for (ll=0; ll(NUM_VARS, nz, nx), YAKL_LAMBDA(int ll, int k, int i) { + tend(ll, k, i) = -(flux(ll, k, i + 1) - flux(ll, k, i)) / dx; + }); } - -//Compute the time tendencies of the fluid state using forcing in the z-direction -//Since the halos are set in a separate routine, this will not require MPI -//First, compute the flux vector at each cell interface in the z-direction (including hyperviscosity) -//Then, compute the tendencies using those fluxes -void compute_tendencies_z( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &hy_dens_int = fixed_data.hy_dens_int ; - auto &hy_dens_theta_int = fixed_data.hy_dens_theta_int ; - auto &hy_pressure_int = fixed_data.hy_pressure_int ; - - real3d flux("flux",NUM_VARS,nz+1,nx); - - //Compute fluxes in the x-direction for each cell - // for (k=0; k(nz+1,xblocks) , YAKL_LAMBDA (int k, int iblk) { - SArray,1,4> stencil; - SArray,1,NUM_VARS> d3_vals; - SArray,1,NUM_VARS> vals; - //Compute the hyperviscosity coefficient - real hv_coef = -hv_beta * dz / (16*dt); - - //Use fourth-order interpolation from four cell averages to compute the value at the interface in question - for (int ll=0; ll() ); - } - //Fourth-order-accurate interpolation of the state - vals(ll) = -stencil(0)/12 + 7*stencil(1)/12 + 7*stencil(2)/12 - stencil(3)/12; - //First-order-accurate interpolation of the third spatial derivative of the state - d3_vals(ll) = -stencil(0) + 3*stencil(1) - 3*stencil(2) + stencil(3); - } - - //Compute density, u-wind, w-wind, potential temperature, and pressure (r,u,w,t,p respectively) - auto r = vals(ID_DENS) + hy_dens_int(k); - auto u = vals(ID_UMOM) / r; - auto w = vals(ID_WMOM) / r; - auto t = ( vals(ID_RHOT) + hy_dens_theta_int(k) ) / r; - auto p = C0*pow((r*t),gamm) - hy_pressure_int(k); - if (k == 0 || k == nz) { - w = 0; - d3_vals(ID_DENS) = 0; - } + unsigned int xblocks = (xdim - 1) / simd_len + 1; + parallel_for( + SimpleBounds<2>(nz + 1, xblocks), YAKL_LAMBDA(int k, int iblk) { + SArray, 1, 4> stencil; + SArray, 1, NUM_VARS> d3_vals; + SArray, 1, NUM_VARS> vals; + // Compute the hyperviscosity coefficient + real hv_coef = -hv_beta * dz / (16 * dt); + + // Use fourth-order interpolation from four cell averages to compute the + // value at the interface in question + for (int ll = 0; ll < NUM_VARS; ll++) { + for (int s = 0; s < sten_size; s++) { + iterate_over_pack( + [&](unsigned int ilane) { + int i = min(xdim - 1, iblk * simd_len + ilane); + stencil(s)(ilane) = state(ll, k + s, hs + i); + }, + PackIterConfig()); + } + // Fourth-order-accurate interpolation of the state + vals(ll) = -stencil(0) / 12 + 7 * stencil(1) / 12 + + 7 * stencil(2) / 12 - stencil(3) / 12; + // First-order-accurate interpolation of the third spatial derivative + // of the state + d3_vals(ll) = + -stencil(0) + 3 * stencil(1) - 3 * stencil(2) + stencil(3); + } - auto f1 = r*w - hv_coef*d3_vals(ID_DENS); - auto f2 = r*w*u - hv_coef*d3_vals(ID_UMOM); - auto f3 = r*w*w+p - hv_coef*d3_vals(ID_WMOM); - auto f4 = r*w*t - hv_coef*d3_vals(ID_RHOT); - - //Compute the flux vector with hyperviscosity - iterate_over_pack( [&] (unsigned int ilane) { - int i = min( xdim-1 , iblk*simd_len + ilane ); - flux(ID_DENS,k,i) = f1(ilane); - flux(ID_UMOM,k,i) = f2(ilane); - flux(ID_WMOM,k,i) = f3(ilane); - flux(ID_RHOT,k,i) = f4(ilane); - } , PackIterConfig() ); - }); - - //Use the fluxes to compute tendencies for each cell - // for (ll=0; ll(NUM_VARS,nz,nx) , YAKL_LAMBDA ( int ll, int k, int i ) { - tend(ll,k,i) = -( flux(ll,k+1,i) - flux(ll,k,i) ) / dz; - if (ll == ID_WMOM) { - tend(ll,k,i) -= state(ID_DENS,hs+k,hs+i)*grav; - } - }); -} + // Compute density, u-wind, w-wind, potential temperature, and pressure + // (r,u,w,t,p respectively) + auto r = vals(ID_DENS) + hy_dens_int(k); + auto u = vals(ID_UMOM) / r; + auto w = vals(ID_WMOM) / r; + auto t = (vals(ID_RHOT) + hy_dens_theta_int(k)) / r; + auto p = C0 * pow((r * t), gamm) - hy_pressure_int(k); + if (k == 0 || k == nz) { + w = 0; + d3_vals(ID_DENS) = 0; + } + auto f1 = r * w - hv_coef * d3_vals(ID_DENS); + auto f2 = r * w * u - hv_coef * d3_vals(ID_UMOM); + auto f3 = r * w * w + p - hv_coef * d3_vals(ID_WMOM); + auto f4 = r * w * t - hv_coef * d3_vals(ID_RHOT); + + // Compute the flux vector with hyperviscosity + iterate_over_pack( + [&](unsigned int ilane) { + int i = min(xdim - 1, iblk * simd_len + ilane); + flux(ID_DENS, k, i) = f1(ilane); + flux(ID_UMOM, k, i) = f2(ilane); + flux(ID_WMOM, k, i) = f3(ilane); + flux(ID_RHOT, k, i) = f4(ilane); + }, + PackIterConfig()); + }); + // Use the fluxes to compute tendencies for each cell + // for (ll=0; ll(NUM_VARS, nz, nx), YAKL_LAMBDA(int ll, int k, int i) { + tend(ll, k, i) = -(flux(ll, k + 1, i) - flux(ll, k, i)) / dz; + if (ll == ID_WMOM) { + tend(ll, k, i) -= state(ID_DENS, hs + k, hs + i) * grav; + } + }); +} -//Set this MPI task's halo values in the x-direction. This routine will require MPI -void set_halo_values_x( real3d const &state , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &k_beg = fixed_data.k_beg ; - auto &left_rank = fixed_data.left_rank ; - auto &right_rank = fixed_data.right_rank ; - auto &myrank = fixed_data.myrank ; - auto &hy_dens_cell = fixed_data.hy_dens_cell ; +// Set this MPI task's halo values in the x-direction. This routine will require +// MPI +void set_halo_values_x(real3d const &state, Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + auto &k_beg = fixed_data.k_beg; + auto &left_rank = fixed_data.left_rank; + auto &right_rank = fixed_data.right_rank; + auto &myrank = fixed_data.myrank; + auto &hy_dens_cell = fixed_data.hy_dens_cell; auto &hy_dens_theta_cell = fixed_data.hy_dens_theta_cell; int ierr; @@ -408,36 +471,52 @@ void set_halo_values_x( real3d const &state , Fixed_data const &fixed_data ) { if (fixed_data.nranks == 1) { - parallel_for( SimpleBounds<2>(NUM_VARS,nz) , YAKL_LAMBDA (int ll, int k) { - state(ll,hs+k,0 ) = state(ll,hs+k,nx+hs-2); - state(ll,hs+k,1 ) = state(ll,hs+k,nx+hs-1); - state(ll,hs+k,nx+hs ) = state(ll,hs+k,hs ); - state(ll,hs+k,nx+hs+1) = state(ll,hs+k,hs+1 ); - }); + parallel_for( + SimpleBounds<2>(NUM_VARS, nz), YAKL_LAMBDA(int ll, int k) { + state(ll, hs + k, 0) = state(ll, hs + k, nx + hs - 2); + state(ll, hs + k, 1) = state(ll, hs + k, nx + hs - 1); + state(ll, hs + k, nx + hs) = state(ll, hs + k, hs); + state(ll, hs + k, nx + hs + 1) = state(ll, hs + k, hs + 1); + }); } else { - real3d sendbuf_l ( "sendbuf_l" , NUM_VARS,nz,hs ); //Buffer to send data to the left MPI rank - real3d sendbuf_r ( "sendbuf_r" , NUM_VARS,nz,hs ); //Buffer to send data to the right MPI rank - real3d recvbuf_l ( "recvbuf_l" , NUM_VARS,nz,hs ); //Buffer to receive data from the left MPI rank - real3d recvbuf_r ( "recvbuf_r" , NUM_VARS,nz,hs ); //Buffer to receive data from the right MPI rank - real3dHost sendbuf_l_cpu( "sendbuf_l" , NUM_VARS,nz,hs ); //Buffer to send data to the left MPI rank (CPU copy) - real3dHost sendbuf_r_cpu( "sendbuf_r" , NUM_VARS,nz,hs ); //Buffer to send data to the right MPI rank (CPU copy) - real3dHost recvbuf_l_cpu( "recvbuf_l" , NUM_VARS,nz,hs ); //Buffer to receive data from the left MPI rank (CPU copy) - real3dHost recvbuf_r_cpu( "recvbuf_r" , NUM_VARS,nz,hs ); //Buffer to receive data from the right MPI rank (CPU copy) - - //Prepost receives - ierr = MPI_Irecv(recvbuf_l_cpu.data(),hs*nz*NUM_VARS,mpi_type, left_rank,0,MPI_COMM_WORLD,&req_r[0]); - ierr = MPI_Irecv(recvbuf_r_cpu.data(),hs*nz*NUM_VARS,mpi_type,right_rank,1,MPI_COMM_WORLD,&req_r[1]); - - //Pack the send buffers - // for (ll=0; ll(NUM_VARS,nz,hs) , YAKL_LAMBDA (int ll, int k, int s) { - sendbuf_l(ll,k,s) = state(ll,k+hs,hs+s); - sendbuf_r(ll,k,s) = state(ll,k+hs,nx+s); - }); + real3d sendbuf_l("sendbuf_l", NUM_VARS, nz, + hs); // Buffer to send data to the left MPI rank + real3d sendbuf_r("sendbuf_r", NUM_VARS, nz, + hs); // Buffer to send data to the right MPI rank + real3d recvbuf_l("recvbuf_l", NUM_VARS, nz, + hs); // Buffer to receive data from the left MPI rank + real3d recvbuf_r("recvbuf_r", NUM_VARS, nz, + hs); // Buffer to receive data from the right MPI rank + real3dHost sendbuf_l_cpu( + "sendbuf_l", NUM_VARS, nz, + hs); // Buffer to send data to the left MPI rank (CPU copy) + real3dHost sendbuf_r_cpu( + "sendbuf_r", NUM_VARS, nz, + hs); // Buffer to send data to the right MPI rank (CPU copy) + real3dHost recvbuf_l_cpu( + "recvbuf_l", NUM_VARS, nz, + hs); // Buffer to receive data from the left MPI rank (CPU copy) + real3dHost recvbuf_r_cpu( + "recvbuf_r", NUM_VARS, nz, + hs); // Buffer to receive data from the right MPI rank (CPU copy) + + // Prepost receives + ierr = MPI_Irecv(recvbuf_l_cpu.data(), hs * nz * NUM_VARS, mpi_type, + left_rank, 0, MPI_COMM_WORLD, &req_r[0]); + ierr = MPI_Irecv(recvbuf_r_cpu.data(), hs * nz * NUM_VARS, mpi_type, + right_rank, 1, MPI_COMM_WORLD, &req_r[1]); + + // Pack the send buffers + // for (ll=0; ll(NUM_VARS, nz, hs), YAKL_LAMBDA(int ll, int k, int s) { + sendbuf_l(ll, k, s) = state(ll, k + hs, hs + s); + sendbuf_r(ll, k, s) = state(ll, k + hs, nx + s); + }); yakl::fence(); // This will copy from GPU to host @@ -445,126 +524,140 @@ void set_halo_values_x( real3d const &state , Fixed_data const &fixed_data ) { sendbuf_r.deep_copy_to(sendbuf_r_cpu); yakl::fence(); - //Fire off the sends - ierr = MPI_Isend(sendbuf_l_cpu.data(),hs*nz*NUM_VARS,mpi_type, left_rank,1,MPI_COMM_WORLD,&req_s[0]); - ierr = MPI_Isend(sendbuf_r_cpu.data(),hs*nz*NUM_VARS,mpi_type,right_rank,0,MPI_COMM_WORLD,&req_s[1]); + // Fire off the sends + ierr = MPI_Isend(sendbuf_l_cpu.data(), hs * nz * NUM_VARS, mpi_type, + left_rank, 1, MPI_COMM_WORLD, &req_s[0]); + ierr = MPI_Isend(sendbuf_r_cpu.data(), hs * nz * NUM_VARS, mpi_type, + right_rank, 0, MPI_COMM_WORLD, &req_s[1]); - //Wait for receives to finish - ierr = MPI_Waitall(2,req_r,MPI_STATUSES_IGNORE); + // Wait for receives to finish + ierr = MPI_Waitall(2, req_r, MPI_STATUSES_IGNORE); // This will copy from host to GPU recvbuf_l_cpu.deep_copy_to(recvbuf_l); recvbuf_r_cpu.deep_copy_to(recvbuf_r); yakl::fence(); - //Unpack the receive buffers - // for (ll=0; ll(NUM_VARS,nz,hs) , YAKL_LAMBDA (int ll, int k, int s) { - state(ll,k+hs,s ) = recvbuf_l(ll,k,s); - state(ll,k+hs,nx+hs+s) = recvbuf_r(ll,k,s); - }); + // Unpack the receive buffers + // for (ll=0; ll(NUM_VARS, nz, hs), YAKL_LAMBDA(int ll, int k, int s) { + state(ll, k + hs, s) = recvbuf_l(ll, k, s); + state(ll, k + hs, nx + hs + s) = recvbuf_r(ll, k, s); + }); yakl::fence(); - //Wait for sends to finish - ierr = MPI_Waitall(2,req_s,MPI_STATUSES_IGNORE); - + // Wait for sends to finish + ierr = MPI_Waitall(2, req_s, MPI_STATUSES_IGNORE); } if (data_spec_int == DATA_SPEC_INJECTION) { if (myrank == 0) { // for (k=0; k(nz,hs) , YAKL_LAMBDA (int k, int i) { - double z = (k_beg + k+0.5)*dz; - if (abs(z-3*zlen/4) <= zlen/16) { - state(ID_UMOM,hs+k,i) = (state(ID_DENS,hs+k,i)+hy_dens_cell(hs+k)) * 50.; - state(ID_RHOT,hs+k,i) = (state(ID_DENS,hs+k,i)+hy_dens_cell(hs+k)) * 298. - hy_dens_theta_cell(hs+k); - } - }); + parallel_for( + SimpleBounds<2>(nz, hs), YAKL_LAMBDA(int k, int i) { + double z = (k_beg + k + 0.5) * dz; + if (abs(z - 3 * zlen / 4) <= zlen / 16) { + state(ID_UMOM, hs + k, i) = + (state(ID_DENS, hs + k, i) + hy_dens_cell(hs + k)) * 50.; + state(ID_RHOT, hs + k, i) = + (state(ID_DENS, hs + k, i) + hy_dens_cell(hs + k)) * 298. - + hy_dens_theta_cell(hs + k); + } + }); } } } +// Set this MPI task's halo values in the z-direction. This does not require MPI +// because there is no MPI decomposition in the vertical direction +void set_halo_values_z(real3d const &state, Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + auto &hy_dens_cell = fixed_data.hy_dens_cell; -//Set this MPI task's halo values in the z-direction. This does not require MPI because there is no MPI -//decomposition in the vertical direction -void set_halo_values_z( real3d const &state , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &hy_dens_cell = fixed_data.hy_dens_cell ; - // for (ll=0; ll(NUM_VARS,nx+2*hs) , YAKL_LAMBDA (int ll, int i) { - if (ll == ID_WMOM) { - state(ll,0 ,i) = 0.; - state(ll,1 ,i) = 0.; - state(ll,nz+hs ,i) = 0.; - state(ll,nz+hs+1,i) = 0.; - } else if (ll == ID_UMOM) { - state(ll,0 ,i) = state(ll,hs ,i) / hy_dens_cell(hs ) * hy_dens_cell(0 ); - state(ll,1 ,i) = state(ll,hs ,i) / hy_dens_cell(hs ) * hy_dens_cell(1 ); - state(ll,nz+hs ,i) = state(ll,nz+hs-1,i) / hy_dens_cell(nz+hs-1) * hy_dens_cell(nz+hs ); - state(ll,nz+hs+1,i) = state(ll,nz+hs-1,i) / hy_dens_cell(nz+hs-1) * hy_dens_cell(nz+hs+1); - } else { - state(ll,0 ,i) = state(ll,hs ,i); - state(ll,1 ,i) = state(ll,hs ,i); - state(ll,nz+hs ,i) = state(ll,nz+hs-1,i); - state(ll,nz+hs+1,i) = state(ll,nz+hs-1,i); - } - }); + parallel_for( + SimpleBounds<2>(NUM_VARS, nx + 2 * hs), YAKL_LAMBDA(int ll, int i) { + if (ll == ID_WMOM) { + state(ll, 0, i) = 0.; + state(ll, 1, i) = 0.; + state(ll, nz + hs, i) = 0.; + state(ll, nz + hs + 1, i) = 0.; + } else if (ll == ID_UMOM) { + state(ll, 0, i) = + state(ll, hs, i) / hy_dens_cell(hs) * hy_dens_cell(0); + state(ll, 1, i) = + state(ll, hs, i) / hy_dens_cell(hs) * hy_dens_cell(1); + state(ll, nz + hs, i) = state(ll, nz + hs - 1, i) / + hy_dens_cell(nz + hs - 1) * + hy_dens_cell(nz + hs); + state(ll, nz + hs + 1, i) = state(ll, nz + hs - 1, i) / + hy_dens_cell(nz + hs - 1) * + hy_dens_cell(nz + hs + 1); + } else { + state(ll, 0, i) = state(ll, hs, i); + state(ll, 1, i) = state(ll, hs, i); + state(ll, nz + hs, i) = state(ll, nz + hs - 1, i); + state(ll, nz + hs + 1, i) = state(ll, nz + hs - 1, i); + } + }); } +void init(real3d &state, real &dt, Fixed_data &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + auto &i_beg = fixed_data.i_beg; + auto &k_beg = fixed_data.k_beg; + auto &left_rank = fixed_data.left_rank; + auto &right_rank = fixed_data.right_rank; + auto &nranks = fixed_data.nranks; + auto &myrank = fixed_data.myrank; + auto &mainproc = fixed_data.mainproc; + int ierr; -void init( real3d &state , real &dt , Fixed_data &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &i_beg = fixed_data.i_beg ; - auto &k_beg = fixed_data.k_beg ; - auto &left_rank = fixed_data.left_rank ; - auto &right_rank = fixed_data.right_rank ; - auto &nranks = fixed_data.nranks ; - auto &myrank = fixed_data.myrank ; - auto &mainproc = fixed_data.mainproc ; - int ierr; - - ierr = MPI_Comm_size(MPI_COMM_WORLD,&nranks); - ierr = MPI_Comm_rank(MPI_COMM_WORLD,&myrank); - real nper = ( (double) nx_glob ) / nranks; - i_beg = round( nper* (myrank) ); - int i_end = round( nper*((myrank)+1) )-1; + ierr = MPI_Comm_size(MPI_COMM_WORLD, &nranks); + ierr = MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + real nper = ((double)nx_glob) / nranks; + i_beg = round(nper * (myrank)); + int i_end = round(nper * ((myrank) + 1)) - 1; nx = i_end - i_beg + 1; - left_rank = myrank - 1; - if (left_rank == -1) left_rank = nranks-1; + left_rank = myrank - 1; + if (left_rank == -1) + left_rank = nranks - 1; right_rank = myrank + 1; - if (right_rank == nranks) right_rank = 0; + if (right_rank == nranks) + right_rank = 0; - //Vertical direction isn't MPI-ized, so the rank's local values = the global values + // Vertical direction isn't MPI-ized, so the rank's local values = the global + // values k_beg = 0; nz = nz_glob; mainproc = (myrank == 0); - //Allocate the model data - state = real3d( "state" , NUM_VARS,nz+2*hs,nx+2*hs); + // Allocate the model data + state = real3d("state", NUM_VARS, nz + 2 * hs, nx + 2 * hs); - //Define the maximum stable time step based on an assumed maximum wind speed - dt = min(dx,dz) / max_speed * cfl; + // Define the maximum stable time step based on an assumed maximum wind speed + dt = min(dx, dz) / max_speed * cfl; - //If I'm the main process in MPI, display some grid information + // If I'm the main process in MPI, display some grid information if (mainproc) { - printf( "nx_glob, nz_glob: %d %d\n", nx_glob, nz_glob); - printf( "dx,dz: %lf %lf\n",dx,dz); - printf( "dt: %lf\n",dt); + printf("nx_glob, nz_glob: %d %d\n", nx_glob, nz_glob); + printf("dx,dz: %lf %lf\n", dx, dz); + printf("dt: %lf\n", dt); } - //Want to make sure this info is displayed before further output + // Want to make sure this info is displayed before further output ierr = MPI_Barrier(MPI_COMM_WORLD); // Define quadrature weights and points const int nqpoints = 3; - SArray qpoints; - SArray qweights; + SArray qpoints; + SArray qweights; qpoints(0) = 0.112701665379258311482073460022; qpoints(1) = 0.500000000000000000000000000000; @@ -579,333 +672,412 @@ void init( real3d &state , real &dt , Fixed_data &fixed_data ) { ////////////////////////////////////////////////////////////////////////// // for (k=0; k(nz+2*hs,nx+2*hs) , YAKL_LAMBDA (int k, int i) { - //Initialize the state to zero - for (int ll=0; ll(nz + 2 * hs, nx + 2 * hs), YAKL_LAMBDA(int k, int i) { + // Initialize the state to zero + for (int ll = 0; ll < NUM_VARS; ll++) { + state(ll, k, i) = 0.; + } + // Use Gauss-Legendre quadrature to initialize a hydrostatic balance + + // temperature perturbation + for (int kk = 0; kk < nqpoints; kk++) { + for (int ii = 0; ii < nqpoints; ii++) { + // Compute the x,z location within the global domain based on cell + // and quadrature index + real x = (i_beg + i - hs + 0.5) * dx + (qpoints(ii) - 0.5) * dx; + real z = (k_beg + k - hs + 0.5) * dz + (qpoints(kk) - 0.5) * dz; + real r, u, w, t, hr, ht; + + // Set the fluid state based on the user's specification + if (data_spec_int == DATA_SPEC_COLLISION) { + collision(x, z, r, u, w, t, hr, ht); + } + if (data_spec_int == DATA_SPEC_THERMAL) { + thermal(x, z, r, u, w, t, hr, ht); + } + if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) { + gravity_waves(x, z, r, u, w, t, hr, ht); + } + if (data_spec_int == DATA_SPEC_DENSITY_CURRENT) { + density_current(x, z, r, u, w, t, hr, ht); + } + if (data_spec_int == DATA_SPEC_INJECTION) { + injection(x, z, r, u, w, t, hr, ht); + } + + // Store into the fluid state array + state(ID_DENS, k, i) += r * qweights(ii) * qweights(kk); + state(ID_UMOM, k, i) += (r + hr) * u * qweights(ii) * qweights(kk); + state(ID_WMOM, k, i) += (r + hr) * w * qweights(ii) * qweights(kk); + state(ID_RHOT, k, i) += + ((r + hr) * (t + ht) - hr * ht) * qweights(ii) * qweights(kk); + } + } + }); + + real1d hy_dens_cell("hy_dens_cell ", nz + 2 * hs); + real1d hy_dens_theta_cell("hy_dens_theta_cell", nz + 2 * hs); + real1d hy_dens_int("hy_dens_int ", nz + 1); + real1d hy_dens_theta_int("hy_dens_theta_int ", nz + 1); + real1d hy_pressure_int("hy_pressure_int ", nz + 1); + + // Compute the hydrostatic background state over vertical cell averages + // for (int k=0; k(nz,nx) , YAKL_LAMBDA (int k, int i) { - dens (k,i) = state(ID_DENS,hs+k,hs+i); - uwnd (k,i) = state(ID_UMOM,hs+k,hs+i) / ( hy_dens_cell(hs+k) + state(ID_DENS,hs+k,hs+i) ); - wwnd (k,i) = state(ID_WMOM,hs+k,hs+i) / ( hy_dens_cell(hs+k) + state(ID_DENS,hs+k,hs+i) ); - theta(k,i) = ( state(ID_RHOT,hs+k,hs+i) + hy_dens_theta_cell(hs+k) ) / ( hy_dens_cell(hs+k) + state(ID_DENS,hs+k,hs+i) ) - hy_dens_theta_cell(hs+k) / hy_dens_cell(hs+k); - }); + // Store perturbed values in the temp arrays for output + // for (k=0; k(nz, nx), YAKL_LAMBDA(int k, int i) { + dens(k, i) = state(ID_DENS, hs + k, hs + i); + uwnd(k, i) = state(ID_UMOM, hs + k, hs + i) / + (hy_dens_cell(hs + k) + state(ID_DENS, hs + k, hs + i)); + wwnd(k, i) = state(ID_WMOM, hs + k, hs + i) / + (hy_dens_cell(hs + k) + state(ID_DENS, hs + k, hs + i)); + theta(k, i) = + (state(ID_RHOT, hs + k, hs + i) + hy_dens_theta_cell(hs + k)) / + (hy_dens_cell(hs + k) + state(ID_DENS, hs + k, hs + i)) - + hy_dens_theta_cell(hs + k) / hy_dens_cell(hs + k); + }); yakl::fence(); - //Write the grid data to file with all the processes writing collectively - st3[0] = num_out; st3[1] = k_beg; st3[2] = i_beg; - ct3[0] = 1 ; ct3[1] = nz ; ct3[2] = nx ; - ncwrap( ncmpi_put_vara_double_all( ncid , dens_varid , st3 , ct3 , dens .createHostCopy().data() ) , __LINE__ ); - ncwrap( ncmpi_put_vara_double_all( ncid , uwnd_varid , st3 , ct3 , uwnd .createHostCopy().data() ) , __LINE__ ); - ncwrap( ncmpi_put_vara_double_all( ncid , wwnd_varid , st3 , ct3 , wwnd .createHostCopy().data() ) , __LINE__ ); - ncwrap( ncmpi_put_vara_double_all( ncid , theta_varid , st3 , ct3 , theta.createHostCopy().data() ) , __LINE__ ); - - //Only the main process needs to write the elapsed time - //Begin "independent" write mode - ncwrap( ncmpi_begin_indep_data(ncid) , __LINE__ ); - //write elapsed time to file + // Write the grid data to file with all the processes writing collectively + st3[0] = num_out; + st3[1] = k_beg; + st3[2] = i_beg; + ct3[0] = 1; + ct3[1] = nz; + ct3[2] = nx; + ncwrap(ncmpi_put_vara_double_all(ncid, dens_varid, st3, ct3, + dens.createHostCopy().data()), + __LINE__); + ncwrap(ncmpi_put_vara_double_all(ncid, uwnd_varid, st3, ct3, + uwnd.createHostCopy().data()), + __LINE__); + ncwrap(ncmpi_put_vara_double_all(ncid, wwnd_varid, st3, ct3, + wwnd.createHostCopy().data()), + __LINE__); + ncwrap(ncmpi_put_vara_double_all(ncid, theta_varid, st3, ct3, + theta.createHostCopy().data()), + __LINE__); + + // Only the main process needs to write the elapsed time + // Begin "independent" write mode + ncwrap(ncmpi_begin_indep_data(ncid), __LINE__); + // write elapsed time to file if (mainproc) { st1[0] = num_out; ct1[0] = 1; double etimearr[1]; - etimearr[0] = etime; ncwrap( ncmpi_put_vara_double( ncid , t_varid , st1 , ct1 , etimearr ) , __LINE__ ); + etimearr[0] = etime; + ncwrap(ncmpi_put_vara_double(ncid, t_varid, st1, ct1, etimearr), __LINE__); } - //End "independent" write mode - ncwrap( ncmpi_end_indep_data(ncid) , __LINE__ ); + // End "independent" write mode + ncwrap(ncmpi_end_indep_data(ncid), __LINE__); - //Close the file - ncwrap( ncmpi_close(ncid) , __LINE__ ); + // Close the file + ncwrap(ncmpi_close(ncid), __LINE__); - //Increment the number of outputs + // Increment the number of outputs num_out = num_out + 1; } - -//Error reporting routine for the PNetCDF I/O -void ncwrap( int ierr , int line ) { +// Error reporting routine for the PNetCDF I/O +void ncwrap(int ierr, int line) { if (ierr != NC_NOERR) { printf("NetCDF Error at line: %d\n", line); - printf("%s\n",ncmpi_strerror(ierr)); + printf("%s\n", ncmpi_strerror(ierr)); exit(-1); } } +void finalize() {} -void finalize() { -} - - -//Compute reduced quantities for error checking without resorting to the "ncdiff" tool -void reductions( realConst3d state, double &mass , double &te , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &hy_dens_cell = fixed_data.hy_dens_cell ; +// Compute reduced quantities for error checking without resorting to the +// "ncdiff" tool +void reductions(realConst3d state, double &mass, double &te, + Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + auto &hy_dens_cell = fixed_data.hy_dens_cell; auto &hy_dens_theta_cell = fixed_data.hy_dens_theta_cell; - doub2d mass2d("mass2d",nz,nx); - doub2d te2d ("te2d ",nz,nx); + doub2d mass2d("mass2d", nz, nx); + doub2d te2d("te2d ", nz, nx); // for (k=0; k(nz,nx) , YAKL_LAMBDA (int k, int i) { - double r = state(ID_DENS,hs+k,hs+i) + hy_dens_cell(hs+k); // Density - double u = state(ID_UMOM,hs+k,hs+i) / r; // U-wind - double w = state(ID_WMOM,hs+k,hs+i) / r; // W-wind - double th = ( state(ID_RHOT,hs+k,hs+i) + hy_dens_theta_cell(hs+k) ) / r; // Potential Temperature (theta) - double p = C0*pow(r*th,gamm); // Pressure - double t = th / pow(p0/p,rd/cp); // Temperature - double ke = r*(u*u+w*w); // Kinetic Energy - double ie = r*cv*t; // Internal Energy - mass2d(k,i) = r *dx*dz; // Accumulate domain mass - te2d (k,i) = (ke + ie)*dx*dz; // Accumulate domain total energy - }); - mass = yakl::intrinsics::sum( mass2d ); - te = yakl::intrinsics::sum( te2d ); + parallel_for( + SimpleBounds<2>(nz, nx), YAKL_LAMBDA(int k, int i) { + double r = + state(ID_DENS, hs + k, hs + i) + hy_dens_cell(hs + k); // Density + double u = state(ID_UMOM, hs + k, hs + i) / r; // U-wind + double w = state(ID_WMOM, hs + k, hs + i) / r; // W-wind + double th = + (state(ID_RHOT, hs + k, hs + i) + hy_dens_theta_cell(hs + k)) / + r; // Potential Temperature (theta) + double p = C0 * pow(r * th, gamm); // Pressure + double t = th / pow(p0 / p, rd / cp); // Temperature + double ke = r * (u * u + w * w); // Kinetic Energy + double ie = r * cv * t; // Internal Energy + mass2d(k, i) = r * dx * dz; // Accumulate domain mass + te2d(k, i) = (ke + ie) * dx * dz; // Accumulate domain total energy + }); + mass = yakl::intrinsics::sum(mass2d); + te = yakl::intrinsics::sum(te2d); double glob[2], loc[2]; loc[0] = mass; loc[1] = te; - int ierr = MPI_Allreduce(loc,glob,2,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD); + int ierr = MPI_Allreduce(loc, glob, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); mass = glob[0]; - te = glob[1]; + te = glob[1]; } - - diff --git a/cpp_yakl/miniWeather_mpi.cpp b/cpp_yakl/miniWeather_mpi.cpp index 4d455f5c..51663f0c 100644 --- a/cpp_yakl/miniWeather_mpi.cpp +++ b/cpp_yakl/miniWeather_mpi.cpp @@ -2,137 +2,167 @@ ////////////////////////////////////////////////////////////////////////////////////////// // miniWeather // Author: Matt Norman , Oak Ridge National Laboratory -// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows -// For documentation, please see the attached documentation in the "documentation" folder +// This code simulates dry, stratified, compressible, non-hydrostatic fluid +// flows For documentation, please see the attached documentation in the +// "documentation" folder // ////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include #include "const.h" #include "pnetcdf.h" #include +#include +#include +#include +#include -// We're going to define all arrays on the host because this doesn't use parallel_for -typedef yakl::Array real1d; -typedef yakl::Array real2d; -typedef yakl::Array real3d; -typedef yakl::Array doub1d; -typedef yakl::Array doub2d; -typedef yakl::Array doub3d; - -typedef yakl::Array realConst1d; -typedef yakl::Array realConst2d; -typedef yakl::Array realConst3d; -typedef yakl::Array doubConst1d; -typedef yakl::Array doubConst2d; -typedef yakl::Array doubConst3d; +// We're going to define all arrays on the host because this doesn't use +// parallel_for +typedef yakl::Array real1d; +typedef yakl::Array real2d; +typedef yakl::Array real3d; +typedef yakl::Array doub1d; +typedef yakl::Array doub2d; +typedef yakl::Array doub3d; + +typedef yakl::Array realConst1d; +typedef yakl::Array realConst2d; +typedef yakl::Array realConst3d; +typedef yakl::Array doubConst1d; +typedef yakl::Array doubConst2d; +typedef yakl::Array doubConst3d; /////////////////////////////////////////////////////////////////////////////////////// -// Variables that are initialized but remain static over the course of the simulation +// Variables that are initialized but remain static over the course of the +// simulation /////////////////////////////////////////////////////////////////////////////////////// struct Fixed_data { - int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task - int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task - int nranks, myrank; //Number of MPI ranks and my rank id - int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain - int mainproc; //Am I the main process (rank == 0)? - realConst1d hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) - realConst1d hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) - realConst1d hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) - realConst1d hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) - realConst1d hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) + int nx, nz; // Number of local grid cells in the x- and z- dimensions for this + // MPI task + int i_beg, + k_beg; // beginning index in the x- and z-directions for this MPI task + int nranks, myrank; // Number of MPI ranks and my rank id + int left_rank, right_rank; // MPI Rank IDs that exist to my left and right in + // the global domain + int mainproc; // Am I the main process (rank == 0)? + realConst1d hy_dens_cell; // hydrostatic density (vert cell avgs). Dimensions: + // (1-hs:nz+hs) + realConst1d hy_dens_theta_cell; // hydrostatic rho*t (vert cell avgs). + // Dimensions: (1-hs:nz+hs) + realConst1d hy_dens_int; // hydrostatic density (vert cell interf). + // Dimensions: (1:nz+1) + realConst1d hy_dens_theta_int; // hydrostatic rho*t (vert cell interf). + // Dimensions: (1:nz+1) + realConst1d hy_pressure_int; // hydrostatic press (vert cell interf). + // Dimensions: (1:nz+1) }; -//Declaring the functions defined after "main" -void init ( real3d &state , real &dt , Fixed_data &fixed_data ); -void finalize ( ); -void injection ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -void density_current ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -void gravity_waves ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -void thermal ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -void collision ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -void hydro_const_theta ( real z , real &r , real &t ); -void hydro_const_bvfreq ( real z , real bv_freq0 , real &r , real &t ); -real sample_ellipse_cosine( real x , real z , real amp , real x0 , real z0 , real xrad , real zrad ); -void output ( realConst3d state , real etime , int &num_out , Fixed_data const &fixed_data ); -void ncwrap ( int ierr , int line ); -void perform_timestep ( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ); -void semi_discrete_step ( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ); -void compute_tendencies_x ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ); -void compute_tendencies_z ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ); -void set_halo_values_x ( real3d const &state , Fixed_data const &fixed_data ); -void set_halo_values_z ( real3d const &state , Fixed_data const &fixed_data ); -void reductions ( realConst3d state , double &mass , double &te , Fixed_data const &fixed_data ); - +// Declaring the functions defined after "main" +void init(real3d &state, real &dt, Fixed_data &fixed_data); +void finalize(); +void injection(real x, real z, real &r, real &u, real &w, real &t, real &hr, + real &ht); +void density_current(real x, real z, real &r, real &u, real &w, real &t, + real &hr, real &ht); +void gravity_waves(real x, real z, real &r, real &u, real &w, real &t, real &hr, + real &ht); +void thermal(real x, real z, real &r, real &u, real &w, real &t, real &hr, + real &ht); +void collision(real x, real z, real &r, real &u, real &w, real &t, real &hr, + real &ht); +void hydro_const_theta(real z, real &r, real &t); +void hydro_const_bvfreq(real z, real bv_freq0, real &r, real &t); +real sample_ellipse_cosine(real x, real z, real amp, real x0, real z0, + real xrad, real zrad); +void output(realConst3d state, real etime, int &num_out, + Fixed_data const &fixed_data); +void ncwrap(int ierr, int line); +void perform_timestep(real3d const &state, real dt, int &direction_switch, + Fixed_data const &fixed_data); +void semi_discrete_step(realConst3d state_init, real3d const &state_forcing, + real3d const &state_out, real dt, int dir, + Fixed_data const &fixed_data); +void compute_tendencies_x(realConst3d state, real3d const &tend, real dt, + Fixed_data const &fixed_data); +void compute_tendencies_z(realConst3d state, real3d const &tend, real dt, + Fixed_data const &fixed_data); +void set_halo_values_x(real3d const &state, Fixed_data const &fixed_data); +void set_halo_values_z(real3d const &state, Fixed_data const &fixed_data); +void reductions(realConst3d state, double &mass, double &te, + Fixed_data const &fixed_data); /////////////////////////////////////////////////////////////////////////////////////// // THE MAIN PROGRAM STARTS HERE /////////////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { - MPI_Init(&argc,&argv); + MPI_Init(&argc, &argv); yakl::init(); { Fixed_data fixed_data; real3d state; - real dt; //Model time step (seconds) + real dt; // Model time step (seconds) // init allocates state - init( state , dt , fixed_data ); + init(state, dt, fixed_data); auto &mainproc = fixed_data.mainproc; - //Initial reductions for mass, kinetic energy, and total energy + // Initial reductions for mass, kinetic energy, and total energy double mass0, te0; - reductions(state,mass0,te0,fixed_data); + reductions(state, mass0, te0, fixed_data); - int num_out = 0; //The number of outputs performed so far - real output_counter = 0; //Helps determine when it's time to do output + int num_out = 0; // The number of outputs performed so far + real output_counter = 0; // Helps determine when it's time to do output real etime = 0; - //Output the initial state + // Output the initial state if (output_freq >= 0) { - output(state,etime,num_out,fixed_data); + output(state, etime, num_out, fixed_data); } - int direction_switch = 1; // Tells dimensionally split which order to take x,z solves + int direction_switch = + 1; // Tells dimensionally split which order to take x,z solves //////////////////////////////////////////////////// // MAIN TIME STEP LOOP //////////////////////////////////////////////////// auto t1 = std::chrono::steady_clock::now(); while (etime < sim_time) { - //If the time step leads to exceeding the simulation time, shorten it for the last step - if (etime + dt > sim_time) { dt = sim_time - etime; } - //Perform a single time step - perform_timestep(state,dt,direction_switch,fixed_data); - //Inform the user - #ifndef NO_INFORM - if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } - #endif - //Update the elapsed time and output counter + // If the time step leads to exceeding the simulation time, shorten it for + // the last step + if (etime + dt > sim_time) { + dt = sim_time - etime; + } + // Perform a single time step + perform_timestep(state, dt, direction_switch, fixed_data); +// Inform the user +#ifndef NO_INFORM + if (mainproc) { + printf("Elapsed Time: %lf / %lf\n", etime, sim_time); + } +#endif + // Update the elapsed time and output counter etime = etime + dt; output_counter = output_counter + dt; - //If it's time for output, reset the counter, and do output + // If it's time for output, reset the counter, and do output if (output_freq >= 0 && output_counter >= output_freq) { output_counter = output_counter - output_freq; - output(state,etime,num_out,fixed_data); + output(state, etime, num_out, fixed_data); } } auto t2 = std::chrono::steady_clock::now(); if (mainproc) { - std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + std::cout << "CPU Time: " + << std::chrono::duration(t2 - t1).count() << " sec\n"; } - //Final reductions for mass, kinetic energy, and total energy + // Final reductions for mass, kinetic energy, and total energy double mass, te; - reductions(state,mass,te,fixed_data); + reductions(state, mass, te, fixed_data); if (mainproc) { - printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); - printf( "d_te: %le\n" , (te - te0 )/te0 ); + printf("d_mass: %le\n", (mass - mass0) / mass0); + printf("d_te: %le\n", (te - te0) / te0); } finalize(); @@ -141,241 +171,261 @@ int main(int argc, char **argv) { MPI_Finalize(); } +// Performs a single dimensionally split time step using a simple low-storage +// three-stage Runge-Kutta time integrator The dimensional splitting is a +// second-order-accurate alternating Strang splitting in which the order of +// directions is alternated each time step. The Runge-Kutta method used here is +// defined as follows: +// q* = q_n + dt/3 * rhs(q_n) +// q** = q_n + dt/2 * rhs(q* ) +// q_n+1 = q_n + dt/1 * rhs(q**) +void perform_timestep(real3d const &state, real dt, int &direction_switch, + Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + + real3d state_tmp("state_tmp", NUM_VARS, nz + 2 * hs, nx + 2 * hs); -//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator -//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the -//order of directions is alternated each time step. -//The Runge-Kutta method used here is defined as follows: -// q* = q_n + dt/3 * rhs(q_n) -// q** = q_n + dt/2 * rhs(q* ) -// q_n+1 = q_n + dt/1 * rhs(q**) -void perform_timestep( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - - real3d state_tmp("state_tmp",NUM_VARS,nz+2*hs,nx+2*hs); - if (direction_switch) { - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data ); - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data ); + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, fixed_data); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, fixed_data); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, fixed_data); + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, fixed_data); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, fixed_data); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, fixed_data); + } else { + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, fixed_data); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, fixed_data); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, fixed_data); + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, fixed_data); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, fixed_data); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, fixed_data); + } + if (direction_switch) { + direction_switch = 0; } else { - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data ); - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data ); + direction_switch = 1; } - if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } } - -//Perform a single semi-discretized step in time with the form: -//state_out = state_init + dt * rhs(state_forcing) -//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out -void semi_discrete_step( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &i_beg = fixed_data.i_beg ; - auto &k_beg = fixed_data.k_beg ; - auto &hy_dens_cell = fixed_data.hy_dens_cell ; - - real3d tend("tend",NUM_VARS,nz,nx); - - if (dir == DIR_X) { - //Set the halo values for this MPI task's fluid state in the x-direction +// Perform a single semi-discretized step in time with the form: +// state_out = state_init + dt * rhs(state_forcing) +// Meaning the step starts from state_init, computes the rhs using +// state_forcing, and stores the result in state_out +void semi_discrete_step(realConst3d state_init, real3d const &state_forcing, + real3d const &state_out, real dt, int dir, + Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + auto &i_beg = fixed_data.i_beg; + auto &k_beg = fixed_data.k_beg; + auto &hy_dens_cell = fixed_data.hy_dens_cell; + + real3d tend("tend", NUM_VARS, nz, nx); + + if (dir == DIR_X) { + // Set the halo values for this MPI task's fluid state in the x-direction yakl::timer_start("halo x"); - set_halo_values_x(state_forcing,fixed_data); + set_halo_values_x(state_forcing, fixed_data); yakl::timer_stop("halo x"); - //Compute the time tendencies for the fluid state in the x-direction + // Compute the time tendencies for the fluid state in the x-direction yakl::timer_start("tendencies x"); - compute_tendencies_x(state_forcing,tend,dt,fixed_data); + compute_tendencies_x(state_forcing, tend, dt, fixed_data); yakl::timer_stop("tendencies x"); } else if (dir == DIR_Z) { - //Set the halo values for this MPI task's fluid state in the z-direction + // Set the halo values for this MPI task's fluid state in the z-direction yakl::timer_start("halo z"); - set_halo_values_z(state_forcing,fixed_data); + set_halo_values_z(state_forcing, fixed_data); yakl::timer_stop("halo z"); - //Compute the time tendencies for the fluid state in the z-direction + // Compute the time tendencies for the fluid state in the z-direction yakl::timer_start("tendencies z"); - compute_tendencies_z(state_forcing,tend,dt,fixed_data); + compute_tendencies_z(state_forcing, tend, dt, fixed_data); yakl::timer_stop("tendencies z"); } ///////////////////////////////////////////////// // TODO: MAKE THESE 3 LOOPS A PARALLEL_FOR ///////////////////////////////////////////////// - //Apply the tendencies to the fluid state + // Apply the tendencies to the fluid state yakl::timer_start("apply tendencies"); - for (int ll=0; ll stencil; - SArray d3_vals; - SArray vals; - //Use fourth-order interpolation from four cell averages to compute the value at the interface in question - for (int ll=0; ll stencil; + SArray d3_vals; + SArray vals; + // Use fourth-order interpolation from four cell averages to compute the + // value at the interface in question + for (int ll = 0; ll < NUM_VARS; ll++) { + for (int s = 0; s < sten_size; s++) { + stencil(s) = state(ll, hs + k, i + s); } - //Fourth-order-accurate interpolation of the state - vals(ll) = -stencil(0)/12 + 7*stencil(1)/12 + 7*stencil(2)/12 - stencil(3)/12; - //First-order-accurate interpolation of the third spatial derivative of the state (for artificial viscosity) - d3_vals(ll) = -stencil(0) + 3*stencil(1) - 3*stencil(2) + stencil(3); + // Fourth-order-accurate interpolation of the state + vals(ll) = -stencil(0) / 12 + 7 * stencil(1) / 12 + + 7 * stencil(2) / 12 - stencil(3) / 12; + // First-order-accurate interpolation of the third spatial derivative of + // the state (for artificial viscosity) + d3_vals(ll) = + -stencil(0) + 3 * stencil(1) - 3 * stencil(2) + stencil(3); } - //Compute density, u-wind, w-wind, potential temperature, and pressure (r,u,w,t,p respectively) - real r = vals(ID_DENS) + hy_dens_cell(hs+k); + // Compute density, u-wind, w-wind, potential temperature, and pressure + // (r,u,w,t,p respectively) + real r = vals(ID_DENS) + hy_dens_cell(hs + k); real u = vals(ID_UMOM) / r; real w = vals(ID_WMOM) / r; - real t = ( vals(ID_RHOT) + hy_dens_theta_cell(hs+k) ) / r; - real p = C0*pow((r*t),gamm); - - //Compute the flux vector - flux(ID_DENS,k,i) = r*u - hv_coef*d3_vals(ID_DENS); - flux(ID_UMOM,k,i) = r*u*u+p - hv_coef*d3_vals(ID_UMOM); - flux(ID_WMOM,k,i) = r*u*w - hv_coef*d3_vals(ID_WMOM); - flux(ID_RHOT,k,i) = r*u*t - hv_coef*d3_vals(ID_RHOT); + real t = (vals(ID_RHOT) + hy_dens_theta_cell(hs + k)) / r; + real p = C0 * pow((r * t), gamm); + + // Compute the flux vector + flux(ID_DENS, k, i) = r * u - hv_coef * d3_vals(ID_DENS); + flux(ID_UMOM, k, i) = r * u * u + p - hv_coef * d3_vals(ID_UMOM); + flux(ID_WMOM, k, i) = r * u * w - hv_coef * d3_vals(ID_WMOM); + flux(ID_RHOT, k, i) = r * u * t - hv_coef * d3_vals(ID_RHOT); } } ///////////////////////////////////////////////// // TODO: MAKE THESE 3 LOOPS A PARALLEL_FOR ///////////////////////////////////////////////// - //Use the fluxes to compute tendencies for each cell - for (int ll=0; ll stencil; - SArray d3_vals; - SArray vals; - //Use fourth-order interpolation from four cell averages to compute the value at the interface in question - for (int ll=0; ll stencil; + SArray d3_vals; + SArray vals; + // Use fourth-order interpolation from four cell averages to compute the + // value at the interface in question + for (int ll = 0; ll < NUM_VARS; ll++) { + for (int s = 0; s < sten_size; s++) { + stencil(s) = state(ll, k + s, hs + i); } - //Fourth-order-accurate interpolation of the state - vals(ll) = -stencil(0)/12 + 7*stencil(1)/12 + 7*stencil(2)/12 - stencil(3)/12; - //First-order-accurate interpolation of the third spatial derivative of the state - d3_vals(ll) = -stencil(0) + 3*stencil(1) - 3*stencil(2) + stencil(3); + // Fourth-order-accurate interpolation of the state + vals(ll) = -stencil(0) / 12 + 7 * stencil(1) / 12 + + 7 * stencil(2) / 12 - stencil(3) / 12; + // First-order-accurate interpolation of the third spatial derivative of + // the state + d3_vals(ll) = + -stencil(0) + 3 * stencil(1) - 3 * stencil(2) + stencil(3); } - //Compute density, u-wind, w-wind, potential temperature, and pressure (r,u,w,t,p respectively) + // Compute density, u-wind, w-wind, potential temperature, and pressure + // (r,u,w,t,p respectively) real r = vals(ID_DENS) + hy_dens_int(k); real u = vals(ID_UMOM) / r; real w = vals(ID_WMOM) / r; - real t = ( vals(ID_RHOT) + hy_dens_theta_int(k) ) / r; - real p = C0*pow((r*t),gamm) - hy_pressure_int(k); + real t = (vals(ID_RHOT) + hy_dens_theta_int(k)) / r; + real p = C0 * pow((r * t), gamm) - hy_pressure_int(k); if (k == 0 || k == nz) { - w = 0; + w = 0; d3_vals(ID_DENS) = 0; } - //Compute the flux vector with hyperviscosity - flux(ID_DENS,k,i) = r*w - hv_coef*d3_vals(ID_DENS); - flux(ID_UMOM,k,i) = r*w*u - hv_coef*d3_vals(ID_UMOM); - flux(ID_WMOM,k,i) = r*w*w+p - hv_coef*d3_vals(ID_WMOM); - flux(ID_RHOT,k,i) = r*w*t - hv_coef*d3_vals(ID_RHOT); + // Compute the flux vector with hyperviscosity + flux(ID_DENS, k, i) = r * w - hv_coef * d3_vals(ID_DENS); + flux(ID_UMOM, k, i) = r * w * u - hv_coef * d3_vals(ID_UMOM); + flux(ID_WMOM, k, i) = r * w * w + p - hv_coef * d3_vals(ID_WMOM); + flux(ID_RHOT, k, i) = r * w * t - hv_coef * d3_vals(ID_RHOT); } } - //Use the fluxes to compute tendencies for each cell + // Use the fluxes to compute tendencies for each cell ///////////////////////////////////////////////// // TODO: MAKE THESE 3 LOOPS A PARALLEL_FOR ///////////////////////////////////////////////// - for (int ll=0; ll qpoints; - SArray qweights; + SArray qpoints; + SArray qweights; qpoints(0) = 0.112701665379258311482073460022; qpoints(1) = 0.500000000000000000000000000000; @@ -575,338 +640,407 @@ void init( real3d &state , real &dt , Fixed_data &fixed_data ) { ///////////////////////////////////////////////// // TODO: MAKE THESE 2 LOOPS A PARALLEL_FOR ///////////////////////////////////////////////// - for (int k=0; k , Oak Ridge National Laboratory -// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows -// For documentation, please see the attached documentation in the "documentation" folder +// This code simulates dry, stratified, compressible, non-hydrostatic fluid +// flows For documentation, please see the attached documentation in the +// "documentation" folder // ////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include #include "const.h" #include "pnetcdf.h" -#include +#include #include +#include +#include +#include +#include -// We're going to define all arrays on the host because this doesn't use parallel_for -typedef yakl::Array real1d; -typedef yakl::Array real2d; -typedef yakl::Array real3d; -typedef yakl::Array doub1d; -typedef yakl::Array doub2d; -typedef yakl::Array doub3d; - -typedef yakl::Array realConst1d; -typedef yakl::Array realConst2d; -typedef yakl::Array realConst3d; -typedef yakl::Array doubConst1d; -typedef yakl::Array doubConst2d; -typedef yakl::Array doubConst3d; - -// Some arrays still need to be on the host, so we will explicitly create Host Array typedefs -typedef yakl::Array real1dHost; -typedef yakl::Array real2dHost; -typedef yakl::Array real3dHost; -typedef yakl::Array doub1dHost; -typedef yakl::Array doub2dHost; -typedef yakl::Array doub3dHost; +// We're going to define all arrays on the host because this doesn't use +// parallel_for +typedef yakl::Array real1d; +typedef yakl::Array real2d; +typedef yakl::Array real3d; +typedef yakl::Array doub1d; +typedef yakl::Array doub2d; +typedef yakl::Array doub3d; + +typedef yakl::Array realConst1d; +typedef yakl::Array realConst2d; +typedef yakl::Array realConst3d; +typedef yakl::Array doubConst1d; +typedef yakl::Array doubConst2d; +typedef yakl::Array doubConst3d; + +// Some arrays still need to be on the host, so we will explicitly create Host +// Array typedefs +typedef yakl::Array real1dHost; +typedef yakl::Array real2dHost; +typedef yakl::Array real3dHost; +typedef yakl::Array doub1dHost; +typedef yakl::Array doub2dHost; +typedef yakl::Array doub3dHost; /////////////////////////////////////////////////////////////////////////////////////// -// Variables that are initialized but remain static over the course of the simulation +// Variables that are initialized but remain static over the course of the +// simulation /////////////////////////////////////////////////////////////////////////////////////// struct Fixed_data { - int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task - int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task - int nranks, myrank; //Number of MPI ranks and my rank id - int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain - int mainproc; //Am I the main process (rank == 0)? - realConst1d hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) - realConst1d hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) - realConst1d hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) - realConst1d hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) - realConst1d hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) + int nx, nz; // Number of local grid cells in the x- and z- dimensions for this + // MPI task + int i_beg, + k_beg; // beginning index in the x- and z-directions for this MPI task + int nranks, myrank; // Number of MPI ranks and my rank id + int left_rank, right_rank; // MPI Rank IDs that exist to my left and right in + // the global domain + int mainproc; // Am I the main process (rank == 0)? + realConst1d hy_dens_cell; // hydrostatic density (vert cell avgs). Dimensions: + // (1-hs:nz+hs) + realConst1d hy_dens_theta_cell; // hydrostatic rho*t (vert cell avgs). + // Dimensions: (1-hs:nz+hs) + realConst1d hy_dens_int; // hydrostatic density (vert cell interf). + // Dimensions: (1:nz+1) + realConst1d hy_dens_theta_int; // hydrostatic rho*t (vert cell interf). + // Dimensions: (1:nz+1) + realConst1d hy_pressure_int; // hydrostatic press (vert cell interf). + // Dimensions: (1:nz+1) }; /////////////////////////////////////////////////////////////////////////////////////// // Variables that are dynamics over the course of the simulation /////////////////////////////////////////////////////////////////////////////////////// -//Declaring the functions defined after "main" -void init ( real3d &state , real &dt , Fixed_data &fixed_data ); -void finalize ( ); -void injection ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -void density_current ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -void gravity_waves ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -void thermal ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -void collision ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -void hydro_const_theta ( real z , real &r , real &t ); -void hydro_const_bvfreq ( real z , real bv_freq0 , real &r , real &t ); -real sample_ellipse_cosine( real x , real z , real amp , real x0 , real z0 , real xrad , real zrad ); -void output ( realConst3d state , real etime , int &num_out , Fixed_data const &fixed_data ); -void ncwrap ( int ierr , int line ); -void perform_timestep ( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ); -void semi_discrete_step ( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ); -void compute_tendencies_x ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ); -void compute_tendencies_z ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ); -void set_halo_values_x ( real3d const &state , Fixed_data const &fixed_data ); -void set_halo_values_z ( real3d const &state , Fixed_data const &fixed_data ); -void reductions ( realConst3d state , double &mass , double &te , Fixed_data const &fixed_data ); - +// Declaring the functions defined after "main" +void init(real3d &state, real &dt, Fixed_data &fixed_data); +void finalize(); +void injection(real x, real z, real &r, real &u, real &w, real &t, real &hr, + real &ht); +void density_current(real x, real z, real &r, real &u, real &w, real &t, + real &hr, real &ht); +void gravity_waves(real x, real z, real &r, real &u, real &w, real &t, real &hr, + real &ht); +void thermal(real x, real z, real &r, real &u, real &w, real &t, real &hr, + real &ht); +void collision(real x, real z, real &r, real &u, real &w, real &t, real &hr, + real &ht); +void hydro_const_theta(real z, real &r, real &t); +void hydro_const_bvfreq(real z, real bv_freq0, real &r, real &t); +real sample_ellipse_cosine(real x, real z, real amp, real x0, real z0, + real xrad, real zrad); +void output(realConst3d state, real etime, int &num_out, + Fixed_data const &fixed_data); +void ncwrap(int ierr, int line); +void perform_timestep(real3d const &state, real dt, int &direction_switch, + Fixed_data const &fixed_data); +void semi_discrete_step(realConst3d state_init, real3d const &state_forcing, + real3d const &state_out, real dt, int dir, + Fixed_data const &fixed_data); +void compute_tendencies_x(realConst3d state, real3d const &tend, real dt, + Fixed_data const &fixed_data); +void compute_tendencies_z(realConst3d state, real3d const &tend, real dt, + Fixed_data const &fixed_data); +void set_halo_values_x(real3d const &state, Fixed_data const &fixed_data); +void set_halo_values_z(real3d const &state, Fixed_data const &fixed_data); +void reductions(realConst3d state, double &mass, double &te, + Fixed_data const &fixed_data); /////////////////////////////////////////////////////////////////////////////////////// // THE MAIN PROGRAM STARTS HERE /////////////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { - MPI_Init(&argc,&argv); + MPI_Init(&argc, &argv); yakl::init(); { Fixed_data fixed_data; real3d state; - real dt; //Model time step (seconds) + real dt; // Model time step (seconds) // Init allocates the state and hydrostatic arrays hy_* - init( state , dt , fixed_data ); + init(state, dt, fixed_data); auto &mainproc = fixed_data.mainproc; - //Initial reductions for mass, kinetic energy, and total energy + // Initial reductions for mass, kinetic energy, and total energy double mass0, te0; - reductions(state,mass0,te0,fixed_data); + reductions(state, mass0, te0, fixed_data); - int num_out = 0; //The number of outputs performed so far - real output_counter = 0; //Helps determine when it's time to do output + int num_out = 0; // The number of outputs performed so far + real output_counter = 0; // Helps determine when it's time to do output real etime = 0; - //Output the initial state + // Output the initial state if (output_freq >= 0) { - output(state,etime,num_out,fixed_data); + output(state, etime, num_out, fixed_data); } - int direction_switch = 1; // Tells dimensionally split which order to take x,z solves + int direction_switch = + 1; // Tells dimensionally split which order to take x,z solves //////////////////////////////////////////////////// // MAIN TIME STEP LOOP @@ -118,36 +143,42 @@ int main(int argc, char **argv) { Kokkos::fence(); auto t1 = std::chrono::steady_clock::now(); while (etime < sim_time) { - //If the time step leads to exceeding the simulation time, shorten it for the last step - if (etime + dt > sim_time) { dt = sim_time - etime; } - //Perform a single time step - perform_timestep(state,dt,direction_switch,fixed_data); - //Inform the user - #ifndef NO_INFORM - if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } - #endif - //Update the elapsed time and output counter + // If the time step leads to exceeding the simulation time, shorten it for + // the last step + if (etime + dt > sim_time) { + dt = sim_time - etime; + } + // Perform a single time step + perform_timestep(state, dt, direction_switch, fixed_data); +// Inform the user +#ifndef NO_INFORM + if (mainproc) { + printf("Elapsed Time: %lf / %lf\n", etime, sim_time); + } +#endif + // Update the elapsed time and output counter etime = etime + dt; output_counter = output_counter + dt; - //If it's time for output, reset the counter, and do output + // If it's time for output, reset the counter, and do output if (output_freq >= 0 && output_counter >= output_freq) { output_counter = output_counter - output_freq; - output(state,etime,num_out,fixed_data); + output(state, etime, num_out, fixed_data); } } Kokkos::fence(); auto t2 = std::chrono::steady_clock::now(); if (mainproc) { - std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + std::cout << "CPU Time: " + << std::chrono::duration(t2 - t1).count() << " sec\n"; } - //Final reductions for mass, kinetic energy, and total energy + // Final reductions for mass, kinetic energy, and total energy double mass, te; - reductions(state,mass,te,fixed_data); + reductions(state, mass, te, fixed_data); if (mainproc) { - printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); - printf( "d_te: %le\n" , (te - te0 )/te0 ); + printf("d_mass: %le\n", (mass - mass0) / mass0); + printf("d_te: %le\n", (te - te0) / te0); } finalize(); @@ -156,225 +187,250 @@ int main(int argc, char **argv) { MPI_Finalize(); } +// Performs a single dimensionally split time step using a simple low-storage +// three-stage Runge-Kutta time integrator The dimensional splitting is a +// second-order-accurate alternating Strang splitting in which the order of +// directions is alternated each time step. The Runge-Kutta method used here is +// defined as follows: +// q* = q_n + dt/3 * rhs(q_n) +// q** = q_n + dt/2 * rhs(q* ) +// q_n+1 = q_n + dt/1 * rhs(q**) +void perform_timestep(real3d const &state, real dt, int &direction_switch, + Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + + real3d state_tmp("state_tmp", NUM_VARS, nz + 2 * hs, nx + 2 * hs); -//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator -//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the -//order of directions is alternated each time step. -//The Runge-Kutta method used here is defined as follows: -// q* = q_n + dt/3 * rhs(q_n) -// q** = q_n + dt/2 * rhs(q* ) -// q_n+1 = q_n + dt/1 * rhs(q**) -void perform_timestep( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - - real3d state_tmp("state_tmp",NUM_VARS,nz+2*hs,nx+2*hs); - if (direction_switch) { - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data ); - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data ); + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, fixed_data); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, fixed_data); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, fixed_data); + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, fixed_data); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, fixed_data); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, fixed_data); + } else { + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, fixed_data); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, fixed_data); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, fixed_data); + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, fixed_data); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, fixed_data); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, fixed_data); + } + if (direction_switch) { + direction_switch = 0; } else { - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data ); - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data ); + direction_switch = 1; } - if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } } - -//Perform a single semi-discretized step in time with the form: -//state_out = state_init + dt * rhs(state_forcing) -//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out -void semi_discrete_step( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &i_beg = fixed_data.i_beg ; - auto &k_beg = fixed_data.k_beg ; - auto &hy_dens_cell = fixed_data.hy_dens_cell ; - - real3d tend("tend",NUM_VARS,nz,nx); - - if (dir == DIR_X) { - //Set the halo values for this MPI task's fluid state in the x-direction +// Perform a single semi-discretized step in time with the form: +// state_out = state_init + dt * rhs(state_forcing) +// Meaning the step starts from state_init, computes the rhs using +// state_forcing, and stores the result in state_out +void semi_discrete_step(realConst3d state_init, real3d const &state_forcing, + real3d const &state_out, real dt, int dir, + Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + auto &i_beg = fixed_data.i_beg; + auto &k_beg = fixed_data.k_beg; + auto &hy_dens_cell = fixed_data.hy_dens_cell; + + real3d tend("tend", NUM_VARS, nz, nx); + + if (dir == DIR_X) { + // Set the halo values for this MPI task's fluid state in the x-direction yakl::timer_start("halo x"); - set_halo_values_x(state_forcing,fixed_data); + set_halo_values_x(state_forcing, fixed_data); yakl::timer_stop("halo x"); - //Compute the time tendencies for the fluid state in the x-direction + // Compute the time tendencies for the fluid state in the x-direction yakl::timer_start("tendencies x"); - compute_tendencies_x(state_forcing,tend,dt,fixed_data); + compute_tendencies_x(state_forcing, tend, dt, fixed_data); yakl::timer_stop("tendencies x"); } else if (dir == DIR_Z) { - //Set the halo values for this MPI task's fluid state in the z-direction + // Set the halo values for this MPI task's fluid state in the z-direction yakl::timer_start("halo z"); - set_halo_values_z(state_forcing,fixed_data); + set_halo_values_z(state_forcing, fixed_data); yakl::timer_stop("halo z"); - //Compute the time tendencies for the fluid state in the z-direction + // Compute the time tendencies for the fluid state in the z-direction yakl::timer_start("tendencies z"); - compute_tendencies_z(state_forcing,tend,dt,fixed_data); + compute_tendencies_z(state_forcing, tend, dt, fixed_data); yakl::timer_stop("tendencies z"); } - //Apply the tendencies to the fluid state - // for (ll=0; ll(NUM_VARS,nz,nx) , KOKKOS_LAMBDA ( int ll, int k, int i ) { - if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) { - real x = (i_beg + i+0.5)*dx; - real z = (k_beg + k+0.5)*dz; - real wpert = sample_ellipse_cosine( x,z , 0.01 , xlen/8. ,1000. , 500. ,500. ); - tend(ID_WMOM,k,i) += wpert*hy_dens_cell(hs+k); - } - state_out(ll,hs+k,hs+i) = state_init(ll,hs+k,hs+i) + dt * tend(ll,k,i); - }); + parallel_for( + SimpleBounds<3>(NUM_VARS, nz, nx), KOKKOS_LAMBDA(int ll, int k, int i) { + if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) { + real x = (i_beg + i + 0.5) * dx; + real z = (k_beg + k + 0.5) * dz; + real wpert = + sample_ellipse_cosine(x, z, 0.01, xlen / 8., 1000., 500., 500.); + tend(ID_WMOM, k, i) += wpert * hy_dens_cell(hs + k); + } + state_out(ll, hs + k, hs + i) = + state_init(ll, hs + k, hs + i) + dt * tend(ll, k, i); + }); yakl::timer_stop("apply tendencies"); } - -//Compute the time tendencies of the fluid state using forcing in the x-direction -//Since the halos are set in a separate routine, this will not require MPI -//First, compute the flux vector at each cell interface in the x-direction (including hyperviscosity) -//Then, compute the tendencies using those fluxes -void compute_tendencies_x( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &hy_dens_cell = fixed_data.hy_dens_cell ; +// Compute the time tendencies of the fluid state using forcing in the +// x-direction Since the halos are set in a separate routine, this will not +// require MPI First, compute the flux vector at each cell interface in the +// x-direction (including hyperviscosity) Then, compute the tendencies using +// those fluxes +void compute_tendencies_x(realConst3d state, real3d const &tend, real dt, + Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + auto &hy_dens_cell = fixed_data.hy_dens_cell; auto &hy_dens_theta_cell = fixed_data.hy_dens_theta_cell; - real3d flux("flux",NUM_VARS,nz,nx+1); + real3d flux("flux", NUM_VARS, nz, nx + 1); + + // Compute the hyperviscosity coefficient + real hv_coef = -hv_beta * dx / (16 * dt); + // Compute fluxes in the x-direction for each cell + // for (k=0; k(nz, nx + 1), KOKKOS_LAMBDA(int k, int i) { + SArray stencil; + SArray d3_vals; + SArray vals; + + // Use fourth-order interpolation from four cell averages to compute the + // value at the interface in question + for (int ll = 0; ll < NUM_VARS; ll++) { + for (int s = 0; s < sten_size; s++) { + stencil(s) = state(ll, hs + k, i + s); + } + // Fourth-order-accurate interpolation of the state + vals(ll) = -stencil(0) / 12 + 7 * stencil(1) / 12 + + 7 * stencil(2) / 12 - stencil(3) / 12; + // First-order-accurate interpolation of the third spatial derivative + // of the state (for artificial viscosity) + d3_vals(ll) = + -stencil(0) + 3 * stencil(1) - 3 * stencil(2) + stencil(3); + } - //Compute the hyperviscosity coefficient - real hv_coef = -hv_beta * dx / (16*dt); - //Compute fluxes in the x-direction for each cell - // for (k=0; k(nz,nx+1) , KOKKOS_LAMBDA (int k, int i ) { - SArray stencil; - SArray d3_vals; - SArray vals; - - //Use fourth-order interpolation from four cell averages to compute the value at the interface in question - for (int ll=0; ll(NUM_VARS,nz,nx) , KOKKOS_LAMBDA ( int ll, int k, int i ) { - tend(ll,k,i) = -( flux(ll,k,i+1) - flux(ll,k,i) ) / dx; - }); + // Use the fluxes to compute tendencies for each cell + // for (ll=0; ll(NUM_VARS, nz, nx), KOKKOS_LAMBDA(int ll, int k, int i) { + tend(ll, k, i) = -(flux(ll, k, i + 1) - flux(ll, k, i)) / dx; + }); } +// Compute the time tendencies of the fluid state using forcing in the +// z-direction Since the halos are set in a separate routine, this will not +// require MPI First, compute the flux vector at each cell interface in the +// z-direction (including hyperviscosity) Then, compute the tendencies using +// those fluxes +void compute_tendencies_z(realConst3d state, real3d const &tend, real dt, + Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + auto &hy_dens_int = fixed_data.hy_dens_int; + auto &hy_dens_theta_int = fixed_data.hy_dens_theta_int; + auto &hy_pressure_int = fixed_data.hy_pressure_int; + + real3d flux("flux", NUM_VARS, nz + 1, nx); + + // Compute the hyperviscosity coefficient + real hv_coef = -hv_beta * dz / (16 * dt); + // Compute fluxes in the x-direction for each cell + // for (k=0; k(nz + 1, nx), KOKKOS_LAMBDA(int k, int i) { + SArray stencil; + SArray d3_vals; + SArray vals; + + // Use fourth-order interpolation from four cell averages to compute the + // value at the interface in question + for (int ll = 0; ll < NUM_VARS; ll++) { + for (int s = 0; s < sten_size; s++) { + stencil(s) = state(ll, k + s, hs + i); + } + // Fourth-order-accurate interpolation of the state + vals(ll) = -stencil(0) / 12 + 7 * stencil(1) / 12 + + 7 * stencil(2) / 12 - stencil(3) / 12; + // First-order-accurate interpolation of the third spatial derivative + // of the state + d3_vals(ll) = + -stencil(0) + 3 * stencil(1) - 3 * stencil(2) + stencil(3); + } -//Compute the time tendencies of the fluid state using forcing in the z-direction -//Since the halos are set in a separate routine, this will not require MPI -//First, compute the flux vector at each cell interface in the z-direction (including hyperviscosity) -//Then, compute the tendencies using those fluxes -void compute_tendencies_z( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &hy_dens_int = fixed_data.hy_dens_int ; - auto &hy_dens_theta_int = fixed_data.hy_dens_theta_int ; - auto &hy_pressure_int = fixed_data.hy_pressure_int ; - - real3d flux("flux",NUM_VARS,nz+1,nx); - - //Compute the hyperviscosity coefficient - real hv_coef = -hv_beta * dz / (16*dt); - //Compute fluxes in the x-direction for each cell - // for (k=0; k(nz+1,nx) , KOKKOS_LAMBDA (int k, int i) { - SArray stencil; - SArray d3_vals; - SArray vals; - - //Use fourth-order interpolation from four cell averages to compute the value at the interface in question - for (int ll=0; ll(NUM_VARS,nz,nx) , KOKKOS_LAMBDA ( int ll, int k, int i ) { - tend(ll,k,i) = -( flux(ll,k+1,i) - flux(ll,k,i) ) / dz; - if (ll == ID_WMOM) { - tend(ll,k,i) -= state(ID_DENS,hs+k,hs+i)*grav; - } - }); + // Use the fluxes to compute tendencies for each cell + // for (ll=0; ll(NUM_VARS, nz, nx), KOKKOS_LAMBDA(int ll, int k, int i) { + tend(ll, k, i) = -(flux(ll, k + 1, i) - flux(ll, k, i)) / dz; + if (ll == ID_WMOM) { + tend(ll, k, i) -= state(ID_DENS, hs + k, hs + i) * grav; + } + }); } - - -//Set this MPI task's halo values in the x-direction. This routine will require MPI -void set_halo_values_x( real3d const &state , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &k_beg = fixed_data.k_beg ; - auto &left_rank = fixed_data.left_rank ; - auto &right_rank = fixed_data.right_rank ; - auto &myrank = fixed_data.myrank ; - auto &hy_dens_cell = fixed_data.hy_dens_cell ; +// Set this MPI task's halo values in the x-direction. This routine will require +// MPI +void set_halo_values_x(real3d const &state, Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + auto &k_beg = fixed_data.k_beg; + auto &left_rank = fixed_data.left_rank; + auto &right_rank = fixed_data.right_rank; + auto &myrank = fixed_data.myrank; + auto &hy_dens_cell = fixed_data.hy_dens_cell; auto &hy_dens_theta_cell = fixed_data.hy_dens_theta_cell; int ierr; @@ -382,180 +438,214 @@ void set_halo_values_x( real3d const &state , Fixed_data const &fixed_data ) { if (fixed_data.nranks == 1) { - parallel_for( SimpleBounds<2>(NUM_VARS,nz) , KOKKOS_LAMBDA (int ll, int k) { - state(ll,hs+k,0 ) = state(ll,hs+k,nx+hs-2); - state(ll,hs+k,1 ) = state(ll,hs+k,nx+hs-1); - state(ll,hs+k,nx+hs ) = state(ll,hs+k,hs ); - state(ll,hs+k,nx+hs+1) = state(ll,hs+k,hs+1 ); - }); + parallel_for( + SimpleBounds<2>(NUM_VARS, nz), KOKKOS_LAMBDA(int ll, int k) { + state(ll, hs + k, 0) = state(ll, hs + k, nx + hs - 2); + state(ll, hs + k, 1) = state(ll, hs + k, nx + hs - 1); + state(ll, hs + k, nx + hs) = state(ll, hs + k, hs); + state(ll, hs + k, nx + hs + 1) = state(ll, hs + k, hs + 1); + }); } else { - real3d sendbuf_l ( "sendbuf_l" , NUM_VARS,nz,hs ); //Buffer to send data to the left MPI rank - real3d sendbuf_r ( "sendbuf_r" , NUM_VARS,nz,hs ); //Buffer to send data to the right MPI rank - real3d recvbuf_l ( "recvbuf_l" , NUM_VARS,nz,hs ); //Buffer to receive data from the left MPI rank - real3d recvbuf_r ( "recvbuf_r" , NUM_VARS,nz,hs ); //Buffer to receive data from the right MPI rank - #ifndef GPU_AWARE_MPI - real3dHost sendbuf_l_cpu( "sendbuf_l" , NUM_VARS,nz,hs ); //Buffer to send data to the left MPI rank (CPU copy) - real3dHost sendbuf_r_cpu( "sendbuf_r" , NUM_VARS,nz,hs ); //Buffer to send data to the right MPI rank (CPU copy) - real3dHost recvbuf_l_cpu( "recvbuf_l" , NUM_VARS,nz,hs ); //Buffer to receive data from the left MPI rank (CPU copy) - real3dHost recvbuf_r_cpu( "recvbuf_r" , NUM_VARS,nz,hs ); //Buffer to receive data from the right MPI rank (CPU copy) - #endif - - //Prepost receives - #ifdef GPU_AWARE_MPI - Kokkos::fence(); - ierr = MPI_Irecv(recvbuf_l.data(),hs*nz*NUM_VARS,mpi_type, left_rank,0,MPI_COMM_WORLD,&req_r[0]); - ierr = MPI_Irecv(recvbuf_r.data(),hs*nz*NUM_VARS,mpi_type,right_rank,1,MPI_COMM_WORLD,&req_r[1]); - #else - ierr = MPI_Irecv(recvbuf_l_cpu.data(),hs*nz*NUM_VARS,mpi_type, left_rank,0,MPI_COMM_WORLD,&req_r[0]); - ierr = MPI_Irecv(recvbuf_r_cpu.data(),hs*nz*NUM_VARS,mpi_type,right_rank,1,MPI_COMM_WORLD,&req_r[1]); - #endif - - //Pack the send buffers - // for (ll=0; ll(NUM_VARS,nz,hs) , KOKKOS_LAMBDA (int ll, int k, int s) { - sendbuf_l(ll,k,s) = state(ll,k+hs,hs+s); - sendbuf_r(ll,k,s) = state(ll,k+hs,nx+s); - }); + real3d sendbuf_l("sendbuf_l", NUM_VARS, nz, + hs); // Buffer to send data to the left MPI rank + real3d sendbuf_r("sendbuf_r", NUM_VARS, nz, + hs); // Buffer to send data to the right MPI rank + real3d recvbuf_l("recvbuf_l", NUM_VARS, nz, + hs); // Buffer to receive data from the left MPI rank + real3d recvbuf_r("recvbuf_r", NUM_VARS, nz, + hs); // Buffer to receive data from the right MPI rank +#ifndef GPU_AWARE_MPI + real3dHost sendbuf_l_cpu( + "sendbuf_l", NUM_VARS, nz, + hs); // Buffer to send data to the left MPI rank (CPU copy) + real3dHost sendbuf_r_cpu( + "sendbuf_r", NUM_VARS, nz, + hs); // Buffer to send data to the right MPI rank (CPU copy) + real3dHost recvbuf_l_cpu( + "recvbuf_l", NUM_VARS, nz, + hs); // Buffer to receive data from the left MPI rank (CPU copy) + real3dHost recvbuf_r_cpu( + "recvbuf_r", NUM_VARS, nz, + hs); // Buffer to receive data from the right MPI rank (CPU copy) +#endif + +// Prepost receives +#ifdef GPU_AWARE_MPI Kokkos::fence(); - - #ifndef GPU_AWARE_MPI - // This will copy from GPU to host - sendbuf_l.deep_copy_to(sendbuf_l_cpu); - sendbuf_r.deep_copy_to(sendbuf_r_cpu); - Kokkos::fence(); - #endif - - //Fire off the sends - #ifdef GPU_AWARE_MPI - ierr = MPI_Isend(sendbuf_l.data(),hs*nz*NUM_VARS,mpi_type, left_rank,1,MPI_COMM_WORLD,&req_s[0]); - ierr = MPI_Isend(sendbuf_r.data(),hs*nz*NUM_VARS,mpi_type,right_rank,0,MPI_COMM_WORLD,&req_s[1]); - #else - ierr = MPI_Isend(sendbuf_l_cpu.data(),hs*nz*NUM_VARS,mpi_type, left_rank,1,MPI_COMM_WORLD,&req_s[0]); - ierr = MPI_Isend(sendbuf_r_cpu.data(),hs*nz*NUM_VARS,mpi_type,right_rank,0,MPI_COMM_WORLD,&req_s[1]); - #endif - - //Wait for receives to finish - ierr = MPI_Waitall(2,req_r,MPI_STATUSES_IGNORE); - - #ifndef GPU_AWARE_MPI - // This will copy from host to GPU - recvbuf_l_cpu.deep_copy_to(recvbuf_l); - recvbuf_r_cpu.deep_copy_to(recvbuf_r); - Kokkos::fence(); - #endif - - //Unpack the receive buffers - // for (ll=0; ll(NUM_VARS,nz,hs) , KOKKOS_LAMBDA (int ll, int k, int s) { - state(ll,k+hs,s ) = recvbuf_l(ll,k,s); - state(ll,k+hs,nx+hs+s) = recvbuf_r(ll,k,s); - }); + ierr = MPI_Irecv(recvbuf_l.data(), hs * nz * NUM_VARS, mpi_type, left_rank, + 0, MPI_COMM_WORLD, &req_r[0]); + ierr = MPI_Irecv(recvbuf_r.data(), hs * nz * NUM_VARS, mpi_type, right_rank, + 1, MPI_COMM_WORLD, &req_r[1]); +#else + ierr = MPI_Irecv(recvbuf_l_cpu.data(), hs * nz * NUM_VARS, mpi_type, + left_rank, 0, MPI_COMM_WORLD, &req_r[0]); + ierr = MPI_Irecv(recvbuf_r_cpu.data(), hs * nz * NUM_VARS, mpi_type, + right_rank, 1, MPI_COMM_WORLD, &req_r[1]); +#endif + + // Pack the send buffers + // for (ll=0; ll(NUM_VARS, nz, hs), KOKKOS_LAMBDA(int ll, int k, int s) { + sendbuf_l(ll, k, s) = state(ll, k + hs, hs + s); + sendbuf_r(ll, k, s) = state(ll, k + hs, nx + s); + }); Kokkos::fence(); - //Wait for sends to finish - ierr = MPI_Waitall(2,req_s,MPI_STATUSES_IGNORE); +#ifndef GPU_AWARE_MPI + // This will copy from GPU to host + sendbuf_l.deep_copy_to(sendbuf_l_cpu); + sendbuf_r.deep_copy_to(sendbuf_r_cpu); + Kokkos::fence(); +#endif + +// Fire off the sends +#ifdef GPU_AWARE_MPI + ierr = MPI_Isend(sendbuf_l.data(), hs * nz * NUM_VARS, mpi_type, left_rank, + 1, MPI_COMM_WORLD, &req_s[0]); + ierr = MPI_Isend(sendbuf_r.data(), hs * nz * NUM_VARS, mpi_type, right_rank, + 0, MPI_COMM_WORLD, &req_s[1]); +#else + ierr = MPI_Isend(sendbuf_l_cpu.data(), hs * nz * NUM_VARS, mpi_type, + left_rank, 1, MPI_COMM_WORLD, &req_s[0]); + ierr = MPI_Isend(sendbuf_r_cpu.data(), hs * nz * NUM_VARS, mpi_type, + right_rank, 0, MPI_COMM_WORLD, &req_s[1]); +#endif + + // Wait for receives to finish + ierr = MPI_Waitall(2, req_r, MPI_STATUSES_IGNORE); + +#ifndef GPU_AWARE_MPI + // This will copy from host to GPU + recvbuf_l_cpu.deep_copy_to(recvbuf_l); + recvbuf_r_cpu.deep_copy_to(recvbuf_r); + Kokkos::fence(); +#endif + + // Unpack the receive buffers + // for (ll=0; ll(NUM_VARS, nz, hs), KOKKOS_LAMBDA(int ll, int k, int s) { + state(ll, k + hs, s) = recvbuf_l(ll, k, s); + state(ll, k + hs, nx + hs + s) = recvbuf_r(ll, k, s); + }); + Kokkos::fence(); + // Wait for sends to finish + ierr = MPI_Waitall(2, req_s, MPI_STATUSES_IGNORE); } if (data_spec_int == DATA_SPEC_INJECTION) { if (myrank == 0) { // for (k=0; k(nz,hs) , KOKKOS_LAMBDA (int k, int i) { - double z = (k_beg + k+0.5)*dz; - if (abs(z-3*zlen/4) <= zlen/16) { - state(ID_UMOM,hs+k,i) = (state(ID_DENS,hs+k,i)+hy_dens_cell(hs+k)) * 50; - state(ID_RHOT,hs+k,i) = (state(ID_DENS,hs+k,i)+hy_dens_cell(hs+k)) * 298 - hy_dens_theta_cell(hs+k); - } - }); + parallel_for( + SimpleBounds<2>(nz, hs), KOKKOS_LAMBDA(int k, int i) { + double z = (k_beg + k + 0.5) * dz; + if (abs(z - 3 * zlen / 4) <= zlen / 16) { + state(ID_UMOM, hs + k, i) = + (state(ID_DENS, hs + k, i) + hy_dens_cell(hs + k)) * 50; + state(ID_RHOT, hs + k, i) = + (state(ID_DENS, hs + k, i) + hy_dens_cell(hs + k)) * 298 - + hy_dens_theta_cell(hs + k); + } + }); } } } +// Set this MPI task's halo values in the z-direction. This does not require MPI +// because there is no MPI decomposition in the vertical direction +void set_halo_values_z(real3d const &state, Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + auto &hy_dens_cell = fixed_data.hy_dens_cell; -//Set this MPI task's halo values in the z-direction. This does not require MPI because there is no MPI -//decomposition in the vertical direction -void set_halo_values_z( real3d const &state , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &hy_dens_cell = fixed_data.hy_dens_cell ; - // for (ll=0; ll(NUM_VARS,nx+2*hs) , KOKKOS_LAMBDA (int ll, int i) { - if (ll == ID_WMOM) { - state(ll,0 ,i) = 0.; - state(ll,1 ,i) = 0.; - state(ll,nz+hs ,i) = 0.; - state(ll,nz+hs+1,i) = 0.; - } else if (ll == ID_UMOM) { - state(ll,0 ,i) = state(ll,hs ,i) / hy_dens_cell(hs ) * hy_dens_cell(0 ); - state(ll,1 ,i) = state(ll,hs ,i) / hy_dens_cell(hs ) * hy_dens_cell(1 ); - state(ll,nz+hs ,i) = state(ll,nz+hs-1,i) / hy_dens_cell(nz+hs-1) * hy_dens_cell(nz+hs ); - state(ll,nz+hs+1,i) = state(ll,nz+hs-1,i) / hy_dens_cell(nz+hs-1) * hy_dens_cell(nz+hs+1); - } else { - state(ll,0 ,i) = state(ll,hs ,i); - state(ll,1 ,i) = state(ll,hs ,i); - state(ll,nz+hs ,i) = state(ll,nz+hs-1,i); - state(ll,nz+hs+1,i) = state(ll,nz+hs-1,i); - } - }); + parallel_for( + SimpleBounds<2>(NUM_VARS, nx + 2 * hs), KOKKOS_LAMBDA(int ll, int i) { + if (ll == ID_WMOM) { + state(ll, 0, i) = 0.; + state(ll, 1, i) = 0.; + state(ll, nz + hs, i) = 0.; + state(ll, nz + hs + 1, i) = 0.; + } else if (ll == ID_UMOM) { + state(ll, 0, i) = + state(ll, hs, i) / hy_dens_cell(hs) * hy_dens_cell(0); + state(ll, 1, i) = + state(ll, hs, i) / hy_dens_cell(hs) * hy_dens_cell(1); + state(ll, nz + hs, i) = state(ll, nz + hs - 1, i) / + hy_dens_cell(nz + hs - 1) * + hy_dens_cell(nz + hs); + state(ll, nz + hs + 1, i) = state(ll, nz + hs - 1, i) / + hy_dens_cell(nz + hs - 1) * + hy_dens_cell(nz + hs + 1); + } else { + state(ll, 0, i) = state(ll, hs, i); + state(ll, 1, i) = state(ll, hs, i); + state(ll, nz + hs, i) = state(ll, nz + hs - 1, i); + state(ll, nz + hs + 1, i) = state(ll, nz + hs - 1, i); + } + }); } +void init(real3d &state, real &dt, Fixed_data &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + auto &i_beg = fixed_data.i_beg; + auto &k_beg = fixed_data.k_beg; + auto &left_rank = fixed_data.left_rank; + auto &right_rank = fixed_data.right_rank; + auto &nranks = fixed_data.nranks; + auto &myrank = fixed_data.myrank; + auto &mainproc = fixed_data.mainproc; + int ierr; -void init( real3d &state , real &dt , Fixed_data &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &i_beg = fixed_data.i_beg ; - auto &k_beg = fixed_data.k_beg ; - auto &left_rank = fixed_data.left_rank ; - auto &right_rank = fixed_data.right_rank ; - auto &nranks = fixed_data.nranks ; - auto &myrank = fixed_data.myrank ; - auto &mainproc = fixed_data.mainproc ; - int ierr; - - ierr = MPI_Comm_size(MPI_COMM_WORLD,&nranks); - ierr = MPI_Comm_rank(MPI_COMM_WORLD,&myrank); - real nper = ( (double) nx_glob ) / nranks; - i_beg = round( nper* (myrank) ); - int i_end = round( nper*((myrank)+1) )-1; + ierr = MPI_Comm_size(MPI_COMM_WORLD, &nranks); + ierr = MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + real nper = ((double)nx_glob) / nranks; + i_beg = round(nper * (myrank)); + int i_end = round(nper * ((myrank) + 1)) - 1; nx = i_end - i_beg + 1; - left_rank = myrank - 1; - if (left_rank == -1) left_rank = nranks-1; + left_rank = myrank - 1; + if (left_rank == -1) + left_rank = nranks - 1; right_rank = myrank + 1; - if (right_rank == nranks) right_rank = 0; + if (right_rank == nranks) + right_rank = 0; - //Vertical direction isn't MPI-ized, so the rank's local values = the global values + // Vertical direction isn't MPI-ized, so the rank's local values = the global + // values k_beg = 0; nz = nz_glob; mainproc = (myrank == 0); - //Allocate the model data - state = real3d( "state" , NUM_VARS,nz+2*hs,nx+2*hs); + // Allocate the model data + state = real3d("state", NUM_VARS, nz + 2 * hs, nx + 2 * hs); - //Define the maximum stable time step based on an assumed maximum wind speed - dt = min(dx,dz) / max_speed * cfl; + // Define the maximum stable time step based on an assumed maximum wind speed + dt = min(dx, dz) / max_speed * cfl; - //If I'm the main process in MPI, display some grid information + // If I'm the main process in MPI, display some grid information if (mainproc) { - printf( "nx_glob, nz_glob: %d %d\n", nx_glob, nz_glob); - printf( "dx,dz: %lf %lf\n",dx,dz); - printf( "dt: %lf\n",dt); + printf("nx_glob, nz_glob: %d %d\n", nx_glob, nz_glob); + printf("dx,dz: %lf %lf\n", dx, dz); + printf("dt: %lf\n", dt); } - //Want to make sure this info is displayed before further output + // Want to make sure this info is displayed before further output ierr = MPI_Barrier(MPI_COMM_WORLD); // Define quadrature weights and points const int nqpoints = 3; - SArray qpoints; - SArray qweights; + SArray qpoints; + SArray qweights; qpoints(0) = 0.112701665379258311482073460022; qpoints(1) = 0.500000000000000000000000000000; @@ -570,333 +660,412 @@ void init( real3d &state , real &dt , Fixed_data &fixed_data ) { ////////////////////////////////////////////////////////////////////////// // for (k=0; k(nz+2*hs,nx+2*hs) , KOKKOS_LAMBDA (int k, int i) { - //Initialize the state to zero - for (int ll=0; ll(nz + 2 * hs, nx + 2 * hs), KOKKOS_LAMBDA(int k, int i) { + // Initialize the state to zero + for (int ll = 0; ll < NUM_VARS; ll++) { + state(ll, k, i) = 0.; + } + // Use Gauss-Legendre quadrature to initialize a hydrostatic balance + + // temperature perturbation + for (int kk = 0; kk < nqpoints; kk++) { + for (int ii = 0; ii < nqpoints; ii++) { + // Compute the x,z location within the global domain based on cell + // and quadrature index + real x = (i_beg + i - hs + 0.5) * dx + (qpoints(ii) - 0.5) * dx; + real z = (k_beg + k - hs + 0.5) * dz + (qpoints(kk) - 0.5) * dz; + real r, u, w, t, hr, ht; + + // Set the fluid state based on the user's specification + if (data_spec_int == DATA_SPEC_COLLISION) { + collision(x, z, r, u, w, t, hr, ht); + } + if (data_spec_int == DATA_SPEC_THERMAL) { + thermal(x, z, r, u, w, t, hr, ht); + } + if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) { + gravity_waves(x, z, r, u, w, t, hr, ht); + } + if (data_spec_int == DATA_SPEC_DENSITY_CURRENT) { + density_current(x, z, r, u, w, t, hr, ht); + } + if (data_spec_int == DATA_SPEC_INJECTION) { + injection(x, z, r, u, w, t, hr, ht); + } + + // Store into the fluid state array + state(ID_DENS, k, i) += r * qweights(ii) * qweights(kk); + state(ID_UMOM, k, i) += (r + hr) * u * qweights(ii) * qweights(kk); + state(ID_WMOM, k, i) += (r + hr) * w * qweights(ii) * qweights(kk); + state(ID_RHOT, k, i) += + ((r + hr) * (t + ht) - hr * ht) * qweights(ii) * qweights(kk); + } + } + }); + + real1d hy_dens_cell("hy_dens_cell ", nz + 2 * hs); + real1d hy_dens_theta_cell("hy_dens_theta_cell", nz + 2 * hs); + real1d hy_dens_int("hy_dens_int ", nz + 1); + real1d hy_dens_theta_int("hy_dens_theta_int ", nz + 1); + real1d hy_pressure_int("hy_pressure_int ", nz + 1); + + // Compute the hydrostatic background state over vertical cell averages + // for (int k=0; k(nz,nx) , KOKKOS_LAMBDA (int k, int i) { - dens (k,i) = state(ID_DENS,hs+k,hs+i); - uwnd (k,i) = state(ID_UMOM,hs+k,hs+i) / ( hy_dens_cell(hs+k) + state(ID_DENS,hs+k,hs+i) ); - wwnd (k,i) = state(ID_WMOM,hs+k,hs+i) / ( hy_dens_cell(hs+k) + state(ID_DENS,hs+k,hs+i) ); - theta(k,i) = ( state(ID_RHOT,hs+k,hs+i) + hy_dens_theta_cell(hs+k) ) / ( hy_dens_cell(hs+k) + state(ID_DENS,hs+k,hs+i) ) - hy_dens_theta_cell(hs+k) / hy_dens_cell(hs+k); - }); + // Store perturbed values in the temp arrays for output + // for (k=0; k(nz, nx), KOKKOS_LAMBDA(int k, int i) { + dens(k, i) = state(ID_DENS, hs + k, hs + i); + uwnd(k, i) = state(ID_UMOM, hs + k, hs + i) / + (hy_dens_cell(hs + k) + state(ID_DENS, hs + k, hs + i)); + wwnd(k, i) = state(ID_WMOM, hs + k, hs + i) / + (hy_dens_cell(hs + k) + state(ID_DENS, hs + k, hs + i)); + theta(k, i) = + (state(ID_RHOT, hs + k, hs + i) + hy_dens_theta_cell(hs + k)) / + (hy_dens_cell(hs + k) + state(ID_DENS, hs + k, hs + i)) - + hy_dens_theta_cell(hs + k) / hy_dens_cell(hs + k); + }); Kokkos::fence(); - //Write the grid data to file with all the processes writing collectively - st3[0] = num_out; st3[1] = k_beg; st3[2] = i_beg; - ct3[0] = 1 ; ct3[1] = nz ; ct3[2] = nx ; - ncwrap( ncmpi_put_vara_double_all( ncid , dens_varid , st3 , ct3 , dens .createHostCopy().data() ) , __LINE__ ); - ncwrap( ncmpi_put_vara_double_all( ncid , uwnd_varid , st3 , ct3 , uwnd .createHostCopy().data() ) , __LINE__ ); - ncwrap( ncmpi_put_vara_double_all( ncid , wwnd_varid , st3 , ct3 , wwnd .createHostCopy().data() ) , __LINE__ ); - ncwrap( ncmpi_put_vara_double_all( ncid , theta_varid , st3 , ct3 , theta.createHostCopy().data() ) , __LINE__ ); - - //Only the main process needs to write the elapsed time - //Begin "independent" write mode - ncwrap( ncmpi_begin_indep_data(ncid) , __LINE__ ); - //write elapsed time to file + // Write the grid data to file with all the processes writing collectively + st3[0] = num_out; + st3[1] = k_beg; + st3[2] = i_beg; + ct3[0] = 1; + ct3[1] = nz; + ct3[2] = nx; + ncwrap(ncmpi_put_vara_double_all(ncid, dens_varid, st3, ct3, + dens.createHostCopy().data()), + __LINE__); + ncwrap(ncmpi_put_vara_double_all(ncid, uwnd_varid, st3, ct3, + uwnd.createHostCopy().data()), + __LINE__); + ncwrap(ncmpi_put_vara_double_all(ncid, wwnd_varid, st3, ct3, + wwnd.createHostCopy().data()), + __LINE__); + ncwrap(ncmpi_put_vara_double_all(ncid, theta_varid, st3, ct3, + theta.createHostCopy().data()), + __LINE__); + + // Only the main process needs to write the elapsed time + // Begin "independent" write mode + ncwrap(ncmpi_begin_indep_data(ncid), __LINE__); + // write elapsed time to file if (mainproc) { st1[0] = num_out; ct1[0] = 1; double etimearr[1]; - etimearr[0] = etime; ncwrap( ncmpi_put_vara_double( ncid , t_varid , st1 , ct1 , etimearr ) , __LINE__ ); + etimearr[0] = etime; + ncwrap(ncmpi_put_vara_double(ncid, t_varid, st1, ct1, etimearr), __LINE__); } - //End "independent" write mode - ncwrap( ncmpi_end_indep_data(ncid) , __LINE__ ); + // End "independent" write mode + ncwrap(ncmpi_end_indep_data(ncid), __LINE__); - //Close the file - ncwrap( ncmpi_close(ncid) , __LINE__ ); + // Close the file + ncwrap(ncmpi_close(ncid), __LINE__); - //Increment the number of outputs + // Increment the number of outputs num_out = num_out + 1; } - -//Error reporting routine for the PNetCDF I/O -void ncwrap( int ierr , int line ) { +// Error reporting routine for the PNetCDF I/O +void ncwrap(int ierr, int line) { if (ierr != NC_NOERR) { printf("NetCDF Error at line: %d\n", line); - printf("%s\n",ncmpi_strerror(ierr)); + printf("%s\n", ncmpi_strerror(ierr)); exit(-1); } } +void finalize() {} -void finalize() { -} - - -//Compute reduced quantities for error checking without resorting to the "ncdiff" tool -void reductions( realConst3d state, double &mass , double &te , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &hy_dens_cell = fixed_data.hy_dens_cell ; +// Compute reduced quantities for error checking without resorting to the +// "ncdiff" tool +void reductions(realConst3d state, double &mass, double &te, + Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + auto &hy_dens_cell = fixed_data.hy_dens_cell; auto &hy_dens_theta_cell = fixed_data.hy_dens_theta_cell; - doub2d mass2d("mass2d",nz,nx); - doub2d te2d ("te2d ",nz,nx); + doub2d mass2d("mass2d", nz, nx); + doub2d te2d("te2d ", nz, nx); // for (k=0; k(nz,nx) , KOKKOS_LAMBDA (int k, int i) { - double r = state(ID_DENS,hs+k,hs+i) + hy_dens_cell(hs+k); // Density - double u = state(ID_UMOM,hs+k,hs+i) / r; // U-wind - double w = state(ID_WMOM,hs+k,hs+i) / r; // W-wind - double th = ( state(ID_RHOT,hs+k,hs+i) + hy_dens_theta_cell(hs+k) ) / r; // Potential Temperature (theta) - double p = C0*pow(r*th,gamm); // Pressure - double t = th / pow(p0/p,rd/cp); // Temperature - double ke = r*(u*u+w*w); // Kinetic Energy - double ie = r*cv*t; // Internal Energy - mass2d(k,i) = r *dx*dz; // Accumulate domain mass - te2d (k,i) = (ke + ie)*dx*dz; // Accumulate domain total energy - }); - mass = yakl::intrinsics::sum( mass2d ); - te = yakl::intrinsics::sum( te2d ); + parallel_for( + SimpleBounds<2>(nz, nx), KOKKOS_LAMBDA(int k, int i) { + double r = + state(ID_DENS, hs + k, hs + i) + hy_dens_cell(hs + k); // Density + double u = state(ID_UMOM, hs + k, hs + i) / r; // U-wind + double w = state(ID_WMOM, hs + k, hs + i) / r; // W-wind + double th = + (state(ID_RHOT, hs + k, hs + i) + hy_dens_theta_cell(hs + k)) / + r; // Potential Temperature (theta) + double p = C0 * pow(r * th, gamm); // Pressure + double t = th / pow(p0 / p, rd / cp); // Temperature + double ke = r * (u * u + w * w); // Kinetic Energy + double ie = r * cv * t; // Internal Energy + mass2d(k, i) = r * dx * dz; // Accumulate domain mass + te2d(k, i) = (ke + ie) * dx * dz; // Accumulate domain total energy + }); + mass = yakl::intrinsics::sum(mass2d); + te = yakl::intrinsics::sum(te2d); double glob[2], loc[2]; loc[0] = mass; loc[1] = te; - int ierr = MPI_Allreduce(loc,glob,2,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD); + int ierr = MPI_Allreduce(loc, glob, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); mass = glob[0]; - te = glob[1]; + te = glob[1]; } - - diff --git a/cpp_yakl/miniWeather_serial.cpp b/cpp_yakl/miniWeather_serial.cpp index 2107c304..a003f68d 100644 --- a/cpp_yakl/miniWeather_serial.cpp +++ b/cpp_yakl/miniWeather_serial.cpp @@ -2,137 +2,167 @@ ////////////////////////////////////////////////////////////////////////////////////////// // miniWeather // Author: Matt Norman , Oak Ridge National Laboratory -// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows -// For documentation, please see the attached documentation in the "documentation" folder +// This code simulates dry, stratified, compressible, non-hydrostatic fluid +// flows For documentation, please see the attached documentation in the +// "documentation" folder // ////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include #include "const.h" #include "pnetcdf.h" #include +#include +#include +#include +#include -// We're going to define all arrays on the host because this doesn't use parallel_for -typedef yakl::Array real1d; -typedef yakl::Array real2d; -typedef yakl::Array real3d; -typedef yakl::Array doub1d; -typedef yakl::Array doub2d; -typedef yakl::Array doub3d; - -typedef yakl::Array realConst1d; -typedef yakl::Array realConst2d; -typedef yakl::Array realConst3d; -typedef yakl::Array doubConst1d; -typedef yakl::Array doubConst2d; -typedef yakl::Array doubConst3d; +// We're going to define all arrays on the host because this doesn't use +// parallel_for +typedef yakl::Array real1d; +typedef yakl::Array real2d; +typedef yakl::Array real3d; +typedef yakl::Array doub1d; +typedef yakl::Array doub2d; +typedef yakl::Array doub3d; + +typedef yakl::Array realConst1d; +typedef yakl::Array realConst2d; +typedef yakl::Array realConst3d; +typedef yakl::Array doubConst1d; +typedef yakl::Array doubConst2d; +typedef yakl::Array doubConst3d; /////////////////////////////////////////////////////////////////////////////////////// -// Variables that are initialized but remain static over the course of the simulation +// Variables that are initialized but remain static over the course of the +// simulation /////////////////////////////////////////////////////////////////////////////////////// struct Fixed_data { - int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task - int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task - int nranks, myrank; //Number of MPI ranks and my rank id - int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain - int mainproc; //Am I the main process (rank == 0)? - realConst1d hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) - realConst1d hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) - realConst1d hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) - realConst1d hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) - realConst1d hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) + int nx, nz; // Number of local grid cells in the x- and z- dimensions for this + // MPI task + int i_beg, + k_beg; // beginning index in the x- and z-directions for this MPI task + int nranks, myrank; // Number of MPI ranks and my rank id + int left_rank, right_rank; // MPI Rank IDs that exist to my left and right in + // the global domain + int mainproc; // Am I the main process (rank == 0)? + realConst1d hy_dens_cell; // hydrostatic density (vert cell avgs). Dimensions: + // (1-hs:nz+hs) + realConst1d hy_dens_theta_cell; // hydrostatic rho*t (vert cell avgs). + // Dimensions: (1-hs:nz+hs) + realConst1d hy_dens_int; // hydrostatic density (vert cell interf). + // Dimensions: (1:nz+1) + realConst1d hy_dens_theta_int; // hydrostatic rho*t (vert cell interf). + // Dimensions: (1:nz+1) + realConst1d hy_pressure_int; // hydrostatic press (vert cell interf). + // Dimensions: (1:nz+1) }; -//Declaring the functions defined after "main" -void init ( real3d &state , real &dt , Fixed_data &fixed_data ); -void finalize ( ); -void injection ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -void density_current ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -void gravity_waves ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -void thermal ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -void collision ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht ); -void hydro_const_theta ( real z , real &r , real &t ); -void hydro_const_bvfreq ( real z , real bv_freq0 , real &r , real &t ); -real sample_ellipse_cosine( real x , real z , real amp , real x0 , real z0 , real xrad , real zrad ); -void output ( realConst3d state , real etime , int &num_out , Fixed_data const &fixed_data ); -void ncwrap ( int ierr , int line ); -void perform_timestep ( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ); -void semi_discrete_step ( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ); -void compute_tendencies_x ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ); -void compute_tendencies_z ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ); -void set_halo_values_x ( real3d const &state , Fixed_data const &fixed_data ); -void set_halo_values_z ( real3d const &state , Fixed_data const &fixed_data ); -void reductions ( realConst3d state , double &mass , double &te , Fixed_data const &fixed_data ); - +// Declaring the functions defined after "main" +void init(real3d &state, real &dt, Fixed_data &fixed_data); +void finalize(); +void injection(real x, real z, real &r, real &u, real &w, real &t, real &hr, + real &ht); +void density_current(real x, real z, real &r, real &u, real &w, real &t, + real &hr, real &ht); +void gravity_waves(real x, real z, real &r, real &u, real &w, real &t, real &hr, + real &ht); +void thermal(real x, real z, real &r, real &u, real &w, real &t, real &hr, + real &ht); +void collision(real x, real z, real &r, real &u, real &w, real &t, real &hr, + real &ht); +void hydro_const_theta(real z, real &r, real &t); +void hydro_const_bvfreq(real z, real bv_freq0, real &r, real &t); +real sample_ellipse_cosine(real x, real z, real amp, real x0, real z0, + real xrad, real zrad); +void output(realConst3d state, real etime, int &num_out, + Fixed_data const &fixed_data); +void ncwrap(int ierr, int line); +void perform_timestep(real3d const &state, real dt, int &direction_switch, + Fixed_data const &fixed_data); +void semi_discrete_step(realConst3d state_init, real3d const &state_forcing, + real3d const &state_out, real dt, int dir, + Fixed_data const &fixed_data); +void compute_tendencies_x(realConst3d state, real3d const &tend, real dt, + Fixed_data const &fixed_data); +void compute_tendencies_z(realConst3d state, real3d const &tend, real dt, + Fixed_data const &fixed_data); +void set_halo_values_x(real3d const &state, Fixed_data const &fixed_data); +void set_halo_values_z(real3d const &state, Fixed_data const &fixed_data); +void reductions(realConst3d state, double &mass, double &te, + Fixed_data const &fixed_data); /////////////////////////////////////////////////////////////////////////////////////// // THE MAIN PROGRAM STARTS HERE /////////////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { - MPI_Init(&argc,&argv); + MPI_Init(&argc, &argv); yakl::init(); { Fixed_data fixed_data; real3d state; - real dt; //Model time step (seconds) + real dt; // Model time step (seconds) // init allocates state - init( state , dt , fixed_data ); + init(state, dt, fixed_data); auto &mainproc = fixed_data.mainproc; - //Initial reductions for mass, kinetic energy, and total energy + // Initial reductions for mass, kinetic energy, and total energy double mass0, te0; - reductions(state,mass0,te0,fixed_data); + reductions(state, mass0, te0, fixed_data); - int num_out = 0; //The number of outputs performed so far - real output_counter = 0; //Helps determine when it's time to do output + int num_out = 0; // The number of outputs performed so far + real output_counter = 0; // Helps determine when it's time to do output real etime = 0; - //Output the initial state + // Output the initial state if (output_freq >= 0) { - output(state,etime,num_out,fixed_data); + output(state, etime, num_out, fixed_data); } - int direction_switch = 1; // Tells dimensionally split which order to take x,z solves + int direction_switch = + 1; // Tells dimensionally split which order to take x,z solves //////////////////////////////////////////////////// // MAIN TIME STEP LOOP //////////////////////////////////////////////////// auto t1 = std::chrono::steady_clock::now(); while (etime < sim_time) { - //If the time step leads to exceeding the simulation time, shorten it for the last step - if (etime + dt > sim_time) { dt = sim_time - etime; } - //Perform a single time step - perform_timestep(state,dt,direction_switch,fixed_data); - //Inform the user - #ifndef NO_INFORM - if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } - #endif - //Update the elapsed time and output counter + // If the time step leads to exceeding the simulation time, shorten it for + // the last step + if (etime + dt > sim_time) { + dt = sim_time - etime; + } + // Perform a single time step + perform_timestep(state, dt, direction_switch, fixed_data); +// Inform the user +#ifndef NO_INFORM + if (mainproc) { + printf("Elapsed Time: %lf / %lf\n", etime, sim_time); + } +#endif + // Update the elapsed time and output counter etime = etime + dt; output_counter = output_counter + dt; - //If it's time for output, reset the counter, and do output + // If it's time for output, reset the counter, and do output if (output_freq >= 0 && output_counter >= output_freq) { output_counter = output_counter - output_freq; - output(state,etime,num_out,fixed_data); + output(state, etime, num_out, fixed_data); } } auto t2 = std::chrono::steady_clock::now(); if (mainproc) { - std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + std::cout << "CPU Time: " + << std::chrono::duration(t2 - t1).count() << " sec\n"; } - //Final reductions for mass, kinetic energy, and total energy + // Final reductions for mass, kinetic energy, and total energy double mass, te; - reductions(state,mass,te,fixed_data); + reductions(state, mass, te, fixed_data); if (mainproc) { - printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); - printf( "d_te: %le\n" , (te - te0 )/te0 ); + printf("d_mass: %le\n", (mass - mass0) / mass0); + printf("d_te: %le\n", (te - te0) / te0); } finalize(); @@ -141,241 +171,261 @@ int main(int argc, char **argv) { MPI_Finalize(); } +// Performs a single dimensionally split time step using a simple low-storage +// three-stage Runge-Kutta time integrator The dimensional splitting is a +// second-order-accurate alternating Strang splitting in which the order of +// directions is alternated each time step. The Runge-Kutta method used here is +// defined as follows: +// q* = q_n + dt/3 * rhs(q_n) +// q** = q_n + dt/2 * rhs(q* ) +// q_n+1 = q_n + dt/1 * rhs(q**) +void perform_timestep(real3d const &state, real dt, int &direction_switch, + Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + + real3d state_tmp("state_tmp", NUM_VARS, nz + 2 * hs, nx + 2 * hs); -//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator -//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the -//order of directions is alternated each time step. -//The Runge-Kutta method used here is defined as follows: -// q* = q_n + dt/3 * rhs(q_n) -// q** = q_n + dt/2 * rhs(q* ) -// q_n+1 = q_n + dt/1 * rhs(q**) -void perform_timestep( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - - real3d state_tmp("state_tmp",NUM_VARS,nz+2*hs,nx+2*hs); - if (direction_switch) { - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data ); - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data ); + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, fixed_data); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, fixed_data); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, fixed_data); + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, fixed_data); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, fixed_data); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, fixed_data); + } else { + // z-direction second + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, fixed_data); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, fixed_data); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, fixed_data); + // x-direction first + semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, fixed_data); + semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, fixed_data); + semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, fixed_data); + } + if (direction_switch) { + direction_switch = 0; } else { - //z-direction second - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data ); - //x-direction first - semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data ); - semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data ); + direction_switch = 1; } - if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } } - -//Perform a single semi-discretized step in time with the form: -//state_out = state_init + dt * rhs(state_forcing) -//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out -void semi_discrete_step( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ) { - auto &nx = fixed_data.nx ; - auto &nz = fixed_data.nz ; - auto &i_beg = fixed_data.i_beg ; - auto &k_beg = fixed_data.k_beg ; - auto &hy_dens_cell = fixed_data.hy_dens_cell ; - - real3d tend("tend",NUM_VARS,nz,nx); - - if (dir == DIR_X) { - //Set the halo values for this MPI task's fluid state in the x-direction +// Perform a single semi-discretized step in time with the form: +// state_out = state_init + dt * rhs(state_forcing) +// Meaning the step starts from state_init, computes the rhs using +// state_forcing, and stores the result in state_out +void semi_discrete_step(realConst3d state_init, real3d const &state_forcing, + real3d const &state_out, real dt, int dir, + Fixed_data const &fixed_data) { + auto &nx = fixed_data.nx; + auto &nz = fixed_data.nz; + auto &i_beg = fixed_data.i_beg; + auto &k_beg = fixed_data.k_beg; + auto &hy_dens_cell = fixed_data.hy_dens_cell; + + real3d tend("tend", NUM_VARS, nz, nx); + + if (dir == DIR_X) { + // Set the halo values for this MPI task's fluid state in the x-direction yakl::timer_start("halo x"); - set_halo_values_x(state_forcing,fixed_data); + set_halo_values_x(state_forcing, fixed_data); yakl::timer_stop("halo x"); - //Compute the time tendencies for the fluid state in the x-direction + // Compute the time tendencies for the fluid state in the x-direction yakl::timer_start("tendencies x"); - compute_tendencies_x(state_forcing,tend,dt,fixed_data); + compute_tendencies_x(state_forcing, tend, dt, fixed_data); yakl::timer_stop("tendencies x"); } else if (dir == DIR_Z) { - //Set the halo values for this MPI task's fluid state in the z-direction + // Set the halo values for this MPI task's fluid state in the z-direction yakl::timer_start("halo z"); - set_halo_values_z(state_forcing,fixed_data); + set_halo_values_z(state_forcing, fixed_data); yakl::timer_stop("halo z"); - //Compute the time tendencies for the fluid state in the z-direction + // Compute the time tendencies for the fluid state in the z-direction yakl::timer_start("tendencies z"); - compute_tendencies_z(state_forcing,tend,dt,fixed_data); + compute_tendencies_z(state_forcing, tend, dt, fixed_data); yakl::timer_stop("tendencies z"); } ///////////////////////////////////////////////// // TODO: MAKE THESE 3 LOOPS A PARALLEL_FOR ///////////////////////////////////////////////// - //Apply the tendencies to the fluid state + // Apply the tendencies to the fluid state yakl::timer_start("apply tendencies"); - for (int ll=0; ll stencil; - SArray d3_vals; - SArray vals; - //Use fourth-order interpolation from four cell averages to compute the value at the interface in question - for (int ll=0; ll stencil; + SArray d3_vals; + SArray vals; + // Use fourth-order interpolation from four cell averages to compute the + // value at the interface in question + for (int ll = 0; ll < NUM_VARS; ll++) { + for (int s = 0; s < sten_size; s++) { + stencil(s) = state(ll, hs + k, i + s); } - //Fourth-order-accurate interpolation of the state - vals(ll) = -stencil(0)/12 + 7*stencil(1)/12 + 7*stencil(2)/12 - stencil(3)/12; - //First-order-accurate interpolation of the third spatial derivative of the state (for artificial viscosity) - d3_vals(ll) = -stencil(0) + 3*stencil(1) - 3*stencil(2) + stencil(3); + // Fourth-order-accurate interpolation of the state + vals(ll) = -stencil(0) / 12 + 7 * stencil(1) / 12 + + 7 * stencil(2) / 12 - stencil(3) / 12; + // First-order-accurate interpolation of the third spatial derivative of + // the state (for artificial viscosity) + d3_vals(ll) = + -stencil(0) + 3 * stencil(1) - 3 * stencil(2) + stencil(3); } - //Compute density, u-wind, w-wind, potential temperature, and pressure (r,u,w,t,p respectively) - real r = vals(ID_DENS) + hy_dens_cell(hs+k); + // Compute density, u-wind, w-wind, potential temperature, and pressure + // (r,u,w,t,p respectively) + real r = vals(ID_DENS) + hy_dens_cell(hs + k); real u = vals(ID_UMOM) / r; real w = vals(ID_WMOM) / r; - real t = ( vals(ID_RHOT) + hy_dens_theta_cell(hs+k) ) / r; - real p = C0*pow((r*t),gamm); - - //Compute the flux vector - flux(ID_DENS,k,i) = r*u - hv_coef*d3_vals(ID_DENS); - flux(ID_UMOM,k,i) = r*u*u+p - hv_coef*d3_vals(ID_UMOM); - flux(ID_WMOM,k,i) = r*u*w - hv_coef*d3_vals(ID_WMOM); - flux(ID_RHOT,k,i) = r*u*t - hv_coef*d3_vals(ID_RHOT); + real t = (vals(ID_RHOT) + hy_dens_theta_cell(hs + k)) / r; + real p = C0 * pow((r * t), gamm); + + // Compute the flux vector + flux(ID_DENS, k, i) = r * u - hv_coef * d3_vals(ID_DENS); + flux(ID_UMOM, k, i) = r * u * u + p - hv_coef * d3_vals(ID_UMOM); + flux(ID_WMOM, k, i) = r * u * w - hv_coef * d3_vals(ID_WMOM); + flux(ID_RHOT, k, i) = r * u * t - hv_coef * d3_vals(ID_RHOT); } } ///////////////////////////////////////////////// // TODO: MAKE THESE 3 LOOPS A PARALLEL_FOR ///////////////////////////////////////////////// - //Use the fluxes to compute tendencies for each cell - for (int ll=0; ll stencil; - SArray d3_vals; - SArray vals; - //Use fourth-order interpolation from four cell averages to compute the value at the interface in question - for (int ll=0; ll stencil; + SArray d3_vals; + SArray vals; + // Use fourth-order interpolation from four cell averages to compute the + // value at the interface in question + for (int ll = 0; ll < NUM_VARS; ll++) { + for (int s = 0; s < sten_size; s++) { + stencil(s) = state(ll, k + s, hs + i); } - //Fourth-order-accurate interpolation of the state - vals(ll) = -stencil(0)/12 + 7*stencil(1)/12 + 7*stencil(2)/12 - stencil(3)/12; - //First-order-accurate interpolation of the third spatial derivative of the state - d3_vals(ll) = -stencil(0) + 3*stencil(1) - 3*stencil(2) + stencil(3); + // Fourth-order-accurate interpolation of the state + vals(ll) = -stencil(0) / 12 + 7 * stencil(1) / 12 + + 7 * stencil(2) / 12 - stencil(3) / 12; + // First-order-accurate interpolation of the third spatial derivative of + // the state + d3_vals(ll) = + -stencil(0) + 3 * stencil(1) - 3 * stencil(2) + stencil(3); } - //Compute density, u-wind, w-wind, potential temperature, and pressure (r,u,w,t,p respectively) + // Compute density, u-wind, w-wind, potential temperature, and pressure + // (r,u,w,t,p respectively) real r = vals(ID_DENS) + hy_dens_int(k); real u = vals(ID_UMOM) / r; real w = vals(ID_WMOM) / r; - real t = ( vals(ID_RHOT) + hy_dens_theta_int(k) ) / r; - real p = C0*pow((r*t),gamm) - hy_pressure_int(k); + real t = (vals(ID_RHOT) + hy_dens_theta_int(k)) / r; + real p = C0 * pow((r * t), gamm) - hy_pressure_int(k); if (k == 0 || k == nz) { - w = 0; + w = 0; d3_vals(ID_DENS) = 0; } - //Compute the flux vector with hyperviscosity - flux(ID_DENS,k,i) = r*w - hv_coef*d3_vals(ID_DENS); - flux(ID_UMOM,k,i) = r*w*u - hv_coef*d3_vals(ID_UMOM); - flux(ID_WMOM,k,i) = r*w*w+p - hv_coef*d3_vals(ID_WMOM); - flux(ID_RHOT,k,i) = r*w*t - hv_coef*d3_vals(ID_RHOT); + // Compute the flux vector with hyperviscosity + flux(ID_DENS, k, i) = r * w - hv_coef * d3_vals(ID_DENS); + flux(ID_UMOM, k, i) = r * w * u - hv_coef * d3_vals(ID_UMOM); + flux(ID_WMOM, k, i) = r * w * w + p - hv_coef * d3_vals(ID_WMOM); + flux(ID_RHOT, k, i) = r * w * t - hv_coef * d3_vals(ID_RHOT); } } - //Use the fluxes to compute tendencies for each cell + // Use the fluxes to compute tendencies for each cell ///////////////////////////////////////////////// // TODO: MAKE THESE 3 LOOPS A PARALLEL_FOR ///////////////////////////////////////////////// - for (int ll=0; ll qpoints; - SArray qweights; + SArray qpoints; + SArray qweights; qpoints(0) = 0.112701665379258311482073460022; qpoints(1) = 0.500000000000000000000000000000; @@ -518,333 +574,402 @@ void init( real3d &state , real &dt , Fixed_data &fixed_data ) { ///////////////////////////////////////////////// // TODO: MAKE THESE 2 LOOPS A PARALLEL_FOR ///////////////////////////////////////////////// - for (int k=0; k