diff --git a/requirements.txt b/requirements.txt index 79dbc5f..b046b1b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,8 @@ numpy pygrib scipy urllib3 +xarray # for the HRES netcdf model level to pressure level conversion +netcdf4 # for the compression when save the netcdf file #netcdf4 #for MERRA, which is currently not supported #pyhdf #for MERRA, which is currently not supported # for packaging and installation diff --git a/src/pyaps3/__init__.py b/src/pyaps3/__init__.py index eb78af7..04896fe 100644 --- a/src/pyaps3/__init__.py +++ b/src/pyaps3/__init__.py @@ -22,8 +22,9 @@ # top-level functions from pyaps3.objects import PyAPS from pyaps3.autoget import ECMWFdload, MERRAdload, NARRdload +from pyaps3.ecmwfml2pl import ECMWFml2pl -__all__ = ['__version__', 'PyAPS', 'ECMWFdload', 'MERRAdload', 'NARRdload'] +__all__ = ['__version__', 'PyAPS', 'ECMWFdload', 'MERRAdload', 'NARRdload', 'ECMWFml2pl'] ############################################################ # Program is part of PyAPS # diff --git a/src/pyaps3/ecmwfml2pl.py b/src/pyaps3/ecmwfml2pl.py new file mode 100644 index 0000000..b30704a --- /dev/null +++ b/src/pyaps3/ecmwfml2pl.py @@ -0,0 +1,259 @@ +# **************************** LICENSE START *********************************** +# +# Copyright 2022 ECMWF. This software is distributed under the terms +# of the Apache License version 2.0. In applying this license, ECMWF does not +# waive the privileges and immunities granted to it by virtue of its status as +# an Intergovernmental Organization or submit itself to any jurisdiction. +# +# ***************************** LICENSE END ************************************ + + +import xarray as xr +import numpy as np +import sys +import os + +# The list of A and B coefficients defining the model levels +PV_COEFF_A = np.array([ + 0.0, 2.000365, 3.102241, 4.666084, 6.827977, 9.746966, 13.605424, 18.608931, 24.985718, 32.98571, + 42.879242, 54.955463, 69.520576, 86.895882, 107.415741, 131.425507, 159.279404, 191.338562, 227.968948, + 269.539581, 316.420746, 368.982361, 427.592499, 492.616028, 564.413452, 643.339905, 729.744141, 823.967834, + 926.34491, 1037.201172, 1156.853638, 1285.610352, 1423.770142, 1571.622925, 1729.448975, 1897.519287, + 2076.095947, 2265.431641, 2465.770508, 2677.348145, 2900.391357, 3135.119385, 3381.743652, 3640.468262, + 3911.490479, 4194.930664, 4490.817383, 4799.149414, 5119.89502, 5452.990723, 5798.344727, 6156.074219, + 6526.946777, 6911.870605, 7311.869141, 7727.412109, 8159.354004, 8608.525391, 9076.400391, 9562.682617, + 10065.97852, 10584.63184, 11116.66211, 11660.06738, 12211.54785, 12766.87305, 13324.66895, 13881.33106, + 14432.13965, 14975.61523, 15508.25684, 16026.11523, 16527.32227, 17008.78906, 17467.61328, 17901.62109, + 18308.43359, 18685.71875, 19031.28906, 19343.51172, 19620.04297, 19859.39063, 20059.93164, 20219.66406, + 20337.86328, 20412.30859, 20442.07813, 20425.71875, 20361.81641, 20249.51172, 20087.08594, 19874.02539, + 19608.57227, 19290.22656, 18917.46094, 18489.70703, 18006.92578, 17471.83984, 16888.6875, 16262.04688, + 15596.69531, 14898.45313, 14173.32422, 13427.76953, 12668.25781, 11901.33984, 11133.30469, 10370.17578, + 9617.515625, 8880.453125, 8163.375, 7470.34375, 6804.421875, 6168.53125, 5564.382813, 4993.796875, 4457.375, + 3955.960938, 3489.234375, 3057.265625, 2659.140625, 2294.242188, 1961.5, 1659.476563, 1387.546875, 1143.25, + 926.507813, 734.992188, 568.0625, 424.414063, 302.476563, 202.484375, 122.101563, 62.78125, 22.835938, + 3.757813, 0.0, 0.0]) + +PV_COEFF_B = np.array([ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7e-06, 2.4e-05, 5.9e-05, + 0.000112, 0.000199, 0.00034, 0.000562, 0.00089, 0.001353, 0.001992, 0.002857, 0.003971, 0.005378, 0.007133, + 0.009261, 0.011806, 0.014816, 0.018318, 0.022355, 0.026964, 0.032176, 0.038026, 0.044548, 0.051773, 0.059728, + 0.068448, 0.077958, 0.088286, 0.099462, 0.111505, 0.124448, 0.138313, 0.153125, 0.16891, 0.185689, 0.203491, + 0.222333, 0.242244, 0.263242, 0.285354, 0.308598, 0.332939, 0.358254, 0.384363, 0.411125, 0.438391, 0.466003, + 0.4938, 0.521619, 0.549301, 0.576692, 0.603648, 0.630036, 0.655736, 0.680643, 0.704669, 0.727739, 0.749797, + 0.770798, 0.790717, 0.809536, 0.827256, 0.843881, 0.859432, 0.873929, 0.887408, 0.8999, 0.911448, 0.922096, + 0.931881, 0.94086, 0.949064, 0.95655, 0.963352, 0.969513, 0.975078, 0.980072, 0.984542, 0.9885, 0.991984, 0.995003, + 0.99763, 1.0]) + +def get_input_variable_list(fin): + ds = xr.open_dataset(fin) + var_list = list(ds.keys()) + ds = None + var_list_unique = list(set(var_list)) + if 'lnsp' not in var_list_unique: + print("Error - lnsp variable missing from input file -exiting") + sys.exit() + if len(var_list_unique) < 2: + print("Error - Data variable missing from input file -exiting") + sys.exit() + return var_list_unique + +def check_requested_levels(plevs): + check_lev = True + if len(plevs) > 1: + error_msg = "Error - only specify 1 input pressure level to interpolate to" + else: + for lev in plevs: + if lev < 0 or lev > 110000 : + check_lev = False + error_msg = "Error - negative values and large positive values for pressure are not allowed -exiting" + if check_lev == False: + print(error_msg) + sys.exit() + return check_lev + +def vertical_interpolate(vcoord_data, interp_var, interp_level): + """A function to interpolate sounding data from each station to + every millibar. Assumes a log-linear relationship. + + Input + ----- + vcoord_data : A 1D array of vertical level values (e.g. from ERA5 pressure at model levels at a point) + interp_var : A 1D array of the variable to be interpolated to the pressure level + interp_level : A 1D array containing the vertical level to interpolate to + + Return + ------ + interp_data : A 1D array that contains the interpolated variable on the interp_level + """ + + inp_vals = [] + for l in interp_level: + # Set NaN for the pressure out of the range + if l < np.min(vcoord_data): + ip = np.NAN + elif l > np.max(vcoord_data): + ip = np.NAN + else: + # Make vertical coordinate data and grid level log variables + lnp = np.log(vcoord_data) + lnp_interval = np.log(l) + # Use numpy to interpolate from observed levels to grid levels + ip = np.interp(lnp_interval, lnp, interp_var) + inp_vals.append(ip) + return inp_vals + +def get_ph_levs(sp, level): + '''Return the presure at a given level and the next''' + ph_lev = PV_COEFF_A[level - 1] + (PV_COEFF_B[level - 1] * sp) + ph_levplusone = PV_COEFF_A[level] + (PV_COEFF_B[level] * sp) + return ph_lev, ph_levplusone + +def compute_z_level(lev, sp, t_level, q_level, z_h): + '''Compute z at half & full level for the given level, based on t/q/sp''' + # compute moist temperature + t = (t_level * (1. + 0.609133 * q_level)).copy() + + # compute the pressures (on half-levels) + ph_lev, ph_levplusone = get_ph_levs(sp, lev) + + if lev == 1: + dlog_p = np.log(ph_levplusone / 0.1) + alpha = np.log(2) + else: + dlog_p = np.log(ph_levplusone / ph_lev) + alpha = 1. - ((ph_lev / (ph_levplusone - ph_lev)) * dlog_p) + + # R_D = 287.06 + t = t * 287.06 + + # z_f is the geopotential of this full level + # integrate from previous (lower) half-level z_h to the + # full level + z_f = z_h + (t * alpha) + + # z_h is the geopotential of 'half-levels' + # integrate z_h to next half level + z_h = z_h + (t * dlog_p) + + return z_h, z_f + +def calculate_pressure_on_model_levels(sp): + # Get the a and b coefficients from the pv array to calculate the model level pressure + p_half=[] + for i in range(len(PV_COEFF_A)): + p_half.append(PV_COEFF_A[i] + PV_COEFF_B[i] * sp) + p_ml=[] + for hybrid in range(len(p_half) - 1): + p_ml.append((p_half[hybrid + 1] + p_half[hybrid]) / 2.0) + return np.stack(p_ml) + + +def calculate_interpolated_pressure_field(data_var_on_ml, data_p_on_ml,plevs): + nlevs = len(data_var_on_ml.level) + p_array = np.stack(data_p_on_ml, axis=2).flatten() + + # Flatten the data array to enable faster processing + var_array = np.stack(data_var_on_ml, axis=2).flatten() + no_grid_points = int(len(var_array)/nlevs) + nlats_values = data_var_on_ml.coords['latitude'] + nlons_values = data_var_on_ml.coords['longitude'] + nlats = len(nlats_values) + nlons = len(nlons_values) + + # Iterate over the data, selecting one vertical profile at a time + count = 0 + profile_count = 0 + interpolated_values=[] + for point in range(no_grid_points): + offset = count*nlevs + var_profile = var_array[offset:offset+nlevs] + p_profile = p_array[offset:offset+nlevs] + interpolated_values.append(vertical_interpolate(p_profile, var_profile, plevs)) + profile_count += len(p_profile) + count = count + 1 + interpolated_field=np.asarray(interpolated_values).reshape(nlats,nlons,len(plevs)) + interpolated_field = np.moveaxis(interpolated_field, -1, 0) + return interpolated_field + +def check_data_cube(dc): + checks = True + for var_name in list(dc.keys()): + if var_name in ['z', 't', 'q', 'lnsp']: + lnsp_dims = ['time', 'level', 'latitude','longitude'] + if all(value in lnsp_dims for value in dc[var_name].dims): + continue + else: + print("Not all required lnsp dimensions found -exiting ", + dc[var_name].dims) + checks = False + return checks + +def ECMWFml2pl(input_fname, output_fname, plevels): + ''' + Convert the ECMWF model level netcdf to pressure level netcdf + + Args: + * input_fname : input ECMWF file at model level in netcdf format (str) + * output_fname : output pressure level in netcdf format (str) + * plevels : pressure levels in Pa (list) + ''' + + check_requested_levels(plevels) + + if not os.path.isfile(input_fname): + print("Input file does not exist - exiting") + sys.exit() + + variable_list = get_input_variable_list(input_fname) + + # Create a data object to hold the input and output data + data_cube = xr.open_dataset(input_fname) + + if not check_data_cube(data_cube): + sys.exit() + # Get the ln surface pressure + data_cube['pml']=data_cube['lnsp'].copy() + data_cube['gph'] = xr.DataArray( + data=np.zeros(data_cube['pml'].shape), dims=data_cube['pml'].dims, + coords=data_cube['pml'].coords) + + for time_step in range(len(data_cube['t'].time)): + data_slice_lnsp=data_cube['lnsp'][time_step,0,:,:] + z_h=data_cube['z'][time_step,0,:,:] + sp = np.exp(data_slice_lnsp) + # Get the pressure field on model levels for each timestep + data_cube['pml'][time_step,:,:,:] = calculate_pressure_on_model_levels(sp) + for lev in range(len(data_cube['t'].level), 0, -1): + t_level = data_cube['t'][time_step,lev - 1,:,:] + q_level = data_cube['q'][time_step,lev - 1,:,:] + z_h, data_cube['gph'][time_step, lev - 1,:,:] = \ + compute_z_level(lev, sp, t_level, q_level, z_h) + + data_cube['pml'].attrs = {'units' : 'Pa','long_name':'pressure', + 'standard_name':'air_pressure','positive':'down'} + data_cube['gph'].attrs = {'units' : 'm**2 s**-2','long_name':'Geopotential', + 'standard_name':'geopotential'} + + all_interpolated_var_fields_list = [] + for var in variable_list + ['gph']: + if var in ['lnsp', 'pml', 'z']: + continue + interpolated_var_field = data_cube[var].copy() + interpolated_var_field = interpolated_var_field[:,0:len(plevels),:,:] + interpolated_var_field = interpolated_var_field.rename({'level':'pressure'}) + interpolated_var_field['pressure'] = plevels + for time_step in range(len(data_cube[var].time)): + var_on_ml = data_cube[var][time_step,:,:,:] + p_on_ml = data_cube['pml'][time_step,:,:,:] + interpolated_var_field[time_step,:,:,:] = \ + calculate_interpolated_pressure_field(var_on_ml,p_on_ml,plevels) + all_interpolated_var_fields_list.append(interpolated_var_field) + all_interpolated_var_fields = xr.merge(all_interpolated_var_fields_list) + all_interpolated_var_fields = all_interpolated_var_fields.rename({'gph':'z'}) + all_interpolated_var_fields['pressure'].attrs = {'units' : 'Pa','long_name':'pressure', + 'standard_name':'air_pressure','positive':'down'} + all_interpolated_var_fields.to_netcdf(output_fname,format='NETCDF4', engine='netcdf4', + encoding={'z': {'zlib': True, 'complevel': 4}}) + all_interpolated_var_fields.close()