Skip to content

Commit c933e72

Browse files
authored
Delete cuTensor (#965)
1 parent 31b52be commit c933e72

File tree

6 files changed

+20
-80
lines changed

6 files changed

+20
-80
lines changed

CMakeLists.txt

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -479,17 +479,6 @@ function(MFC_SETUP_TARGET)
479479
"-foffload-options=-lgfortran\ -lm"
480480
"-fno-exceptions")
481481
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
482-
find_package(cuTENSOR)
483-
if (NOT cuTENSOR_FOUND)
484-
message(WARNING
485-
"Failed to locate the NVIDIA cuTENSOR library. MFC will be "
486-
"built without support for it, disallowing the use of "
487-
"cu_tensor=T. This can result in degraded performance.")
488-
else()
489-
target_link_libraries (${a_target} PRIVATE cuTENSOR::cuTENSOR)
490-
target_compile_definitions(${a_target} PRIVATE MFC_cuTENSOR)
491-
endif()
492-
493482
foreach (cc ${MFC_CUDA_CC})
494483
target_compile_options(${a_target}
495484
PRIVATE -gpu=cc${cc}

src/simulation/m_checker.fpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,6 @@ contains
5757
#if !defined(MFC_OpenACC) && !(defined(__PGI) || defined(_CRAYFTN))
5858
@:PROHIBIT(rdma_mpi, "Unsupported value of rdma_mpi for the current compiler")
5959
#endif
60-
61-
#ifndef MFC_cuTENSOR
62-
@:PROHIBIT(cu_tensor, "MFC was not built with the NVIDIA cuTENSOR library")
63-
#endif
64-
6560
end subroutine s_check_inputs_compilers
6661

6762
impure subroutine s_check_inputs_igr

src/simulation/m_global_parameters.fpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ module m_global_parameters
165165
integer :: hyper_model !< hyperelasticity solver algorithm
166166
logical :: elasticity !< elasticity modeling, true for hyper or hypo
167167
logical, parameter :: chemistry = .${chemistry}$. !< Chemistry modeling
168-
logical :: cu_tensor
169168
logical :: shear_stress !< Shear stresses
170169
logical :: bulk_stress !< Bulk stresses
171170
logical :: cont_damage !< Continuum damage modeling
@@ -497,7 +496,7 @@ module m_global_parameters
497496
real(wp) :: mytime !< Current simulation time
498497
real(wp) :: finaltime !< Final simulation time
499498

500-
logical :: weno_flat, riemann_flat, rdma_mpi
499+
logical :: rdma_mpi
501500

502501
type(pres_field), allocatable, dimension(:) :: pb_ts
503502

@@ -589,8 +588,6 @@ contains
589588
hyper_model = dflt_int
590589
b_size = dflt_int
591590
tensor_size = dflt_int
592-
weno_flat = .true.
593-
riemann_flat = .true.
594591
rdma_mpi = .false.
595592
shear_stress = .false.
596593
bulk_stress = .false.
@@ -698,9 +695,6 @@ contains
698695
sigma = dflt_real
699696
surface_tension = .false.
700697
701-
! Cuda aware MPI
702-
cu_tensor = .false.
703-
704698
bodyForces = .false.
705699
bf_x = .false.; bf_y = .false.; bf_z = .false.
706700
!< amplitude, frequency, and phase shift sinusoid in each direction

src/simulation/m_mpi_proxy.fpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ contains
104104
#:endfor
105105
106106
#:for VAR in [ 'run_time_info','cyl_coord', 'mpp_lim', &
107-
& 'mp_weno', 'rdma_mpi', 'weno_flat', 'riemann_flat', &
107+
& 'mp_weno', 'rdma_mpi', 'powell', 'cont_damage', 'bc_io', &
108108
& 'weno_Re_flux', 'alt_soundspeed', 'null_weights', 'mixture_err', &
109109
& 'parallel_io', 'hypoelasticity', 'bubbles_euler', 'polytropic', &
110110
& 'polydisperse', 'qbmm', 'acoustic_source', 'probe_wrt', 'integral_wrt', &
@@ -115,7 +115,7 @@ contains
115115
& 'bc_z%grcbc_in', 'bc_z%grcbc_out', 'bc_z%grcbc_vel_out', &
116116
& 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'surface_tension', &
117117
& 'shear_stress', 'bulk_stress', 'bubbles_lagrange', &
118-
& 'hyperelasticity', 'bc_io', 'powell', 'cont_damage' ]
118+
& 'hyperelasticity']
119119
call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
120120
#:endfor
121121

src/simulation/m_start_up.fpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,8 @@ contains
146146
! Namelist of the global parameters which may be specified by user
147147
namelist /user_inputs/ case_dir, run_time_info, m, n, p, dt, &
148148
t_step_start, t_step_stop, t_step_save, t_step_print, &
149-
model_eqns, mpp_lim, time_stepper, weno_eps, weno_flat, &
150-
riemann_flat, rdma_mpi, cu_tensor, &
151-
teno_CT, mp_weno, weno_avg, &
149+
model_eqns, mpp_lim, time_stepper, weno_eps, &
150+
rdma_mpi, teno_CT, mp_weno, weno_avg, &
152151
riemann_solver, low_Mach, wave_speeds, avg_state, &
153152
bc_x, bc_y, bc_z, &
154153
x_a, y_a, z_a, x_b, y_b, z_b, &

src/simulation/m_weno.fpp

Lines changed: 15 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,69 +1142,32 @@ contains
11421142
if (n == 0) return
11431143

11441144
if (weno_dir == 2) then
1145-
#if MFC_cuTENSOR
1146-
if (cu_tensor) then
1147-
if (p == 0) then
1148-
block
1149-
use CuTensorEx
1150-
1151-
#:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]')
1152-
v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1, sys_size], order=[2, 1, 3, 4])
1153-
#:endcall GPU_HOST_DATA
1154-
end block
1155-
else
1156-
block
1157-
use CuTensorEx
1158-
1159-
#:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]')
1160-
v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1 + 2*buff_size, sys_size], order=[2, 1, 3, 4])
1161-
#:endcall GPU_HOST_DATA
1162-
end block
1163-
end if
1164-
else
1165-
#endif
1166-
$:GPU_PARALLEL_LOOP(collapse=4)
1167-
do j = 1, v_size
1168-
do q = is3_weno%beg, is3_weno%end
1169-
do l = is2_weno%beg, is2_weno%end
1170-
do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn
1171-
v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q)
1172-
end do
1145+
$:GPU_PARALLEL_LOOP(collapse=4)
1146+
do j = 1, v_size
1147+
do q = is3_weno%beg, is3_weno%end
1148+
do l = is2_weno%beg, is2_weno%end
1149+
do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn
1150+
v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q)
11731151
end do
11741152
end do
11751153
end do
1176-
#if MFC_cuTENSOR
1177-
end if
1178-
#endif
1154+
end do
11791155
end if
11801156

11811157
! Reshaping/Projecting onto Characteristic Fields in z-direction
11821158
if (p == 0) return
1159+
11831160
if (weno_dir == 3) then
1184-
#if MFC_cuTENSOR
1185-
if (cu_tensor) then
1186-
block
1187-
use CuTensorEx
1188-
1189-
#:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_z]')
1190-
v_rs_ws_z = reshape(v_rs_ws_x, shape=[p + 1 + 2*buff_size, n + 2*buff_size + 1, m + 2*buff_size + 1, sys_size], order=[3, 2, 1, 4])
1191-
#:endcall
1192-
end block
1193-
else
1194-
#endif
1195-
$:GPU_PARALLEL_LOOP(collapse=4)
1196-
do j = 1, v_size
1197-
do q = is3_weno%beg, is3_weno%end
1198-
do l = is2_weno%beg, is2_weno%end
1199-
do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn
1200-
v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k)
1201-
end do
1161+
$:GPU_PARALLEL_LOOP(collapse=4)
1162+
do j = 1, v_size
1163+
do q = is3_weno%beg, is3_weno%end
1164+
do l = is2_weno%beg, is2_weno%end
1165+
do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn
1166+
v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k)
12021167
end do
12031168
end do
12041169
end do
1205-
#if MFC_cuTENSOR
1206-
end if
1207-
#endif
1170+
end do
12081171
end if
12091172

12101173
end subroutine s_initialize_weno

0 commit comments

Comments
 (0)