5. Surface forcing - NOC-MSM/Regional-NEMO-Medusa GitHub Wiki

1. Extract ERA5 data

The model is forced by ERA5 data.

The ERA5 data for the EAfrica domain can be downloaded from the livljobs server, using modified scripts from nibrun.

On livljobs7, change directory to one with a lot of space, and copy over the main ERA5 download script from github::

https://github.com/NOC-MSM/DEV_nibrun/blob/master/FORCING_DATA/OFFICIAL_Generate_NEMO_Forcing_NEWERA.py

In OFFICIAL_GENERATE_NEMO_Forcing_NEWERA.py, change the date range of the data and also the lat and lon coordinates of the region you want to extract. IMPORTANT: Make sure the extracted region is larger than your model domain size.

You also need to update path_EXTRACT and path_FORCING, but path_ERA5 should remain the same (if running on a livljobs server).

I did the EAfrica ERA5 extraction here on livljobs7 : /work/valegu/Generate_Forcings_EA_R12/

vi OFFICIAL_Generate_NEMO_Forcing_NEWERA

#
#====================== DOCSTRING ============================
"""
Generate ERA5 atmospheric forcing for NEMO
So far to prduce a year by a year - need to be automated
--------------------------------------------------------------
"""
__author__      = "Nicolas Bruneau"
__copyright__   = "Copyright 2018, NOC"
__email__       = "[email protected]"
__date__        = "2018-05"

#====================== USR PARAMS ===========================

Year_init    = 2015                                             ## First year to process          
Year_end     = 2015                                             ## Last one [included]
East         =   44                                             ## East Border
West         =   38                                             ## West Border
North        =   -1                                             ## North Border
South        =   -12                                            ## South Border
path_ERA5    = '/projectsa/NEMO/Forcing/ERA5/SURFACE_FORCING/'  ## ROOT PATH OD ERA5 DATA
path_EXTRACT = './EXTRACTION/'                                  ## WHERE TO EXTRACT YOUR REGION
path_FORCING = './FORCING/'                                     ## NEMO FORCING
clean        = False                                            ## Clean extraction (longest bit)
sph_ON       = True                                             ## Compute specific humidity or not

#================== NEMO DOCUMENTATION =======================

"""
See the manual in section SBC for more details on the way data
are used in NEMO
The time variable from the netcdf is not used
"""

#====================== LOAD MODULES =========================

import os, sys, glob
import subprocess
import numpy as np
import datetime
from   netCDF4 import Dataset, MFDataset
import netcdftime
import matplotlib.pyplot as plt
from   matplotlib.mlab import griddata
import scipy.spatial.qhull as qhull

#====================== VARIABLE DEF =========================

var_path = { "10m_u_component_of_wind" : "u10", \
             "10m_v_component_of_wind" : "v10", \
             "2m_temperature"          : "t2m", \
             "mean_sea_level_pressure" : "msl", \
             "mean_snowfall_rate"      : "msr" , \
             "mean_surface_downward_long_wave_radiation_flux"  : "msdwlwrf", \
             "mean_surface_downward_short_wave_radiation_flux" : "msdwswrf", \
             "mean_total_precipitation_rate" : "mtpr" }

if sph_ON :
   var_path[ "surface_pressure"  ] = 'sp'
   var_path[ "2m_dewpoint_temperature" ] = 'd2m'

#===================== INTERNAL FCTNS ========================

help_tosec = np.vectorize( lambda x : x.total_seconds() )

def Read_NetCDF_Concatenate( fname, KeyVar ) :
    'Read NetCDF file'
    lfiles = sorted( glob.glob( fname ) )
    for iF, myfile in enumerate(lfiles) :
        nc = Dataset( myfile, 'r' )
        ## Get time using the time variable
        Time_Var = nc.variables[ 'time']
        dt = Time_Var[:][1] - Time_Var[:][0]
        Time_H   = np.arange( Time_Var[:][0], Time_Var[:][0]+dt*Time_Var[:].size, dt )
        try :    Tref = netcdftime.utime( Time_Var.units, calendar = Time_Var.calendar )
        except : Tref = netcdftime.utime( Time_Var.units, calendar = "gregorian" )
        Time = Tref.num2date( Time_H )
        ## Get Coordinates
        if iF == 0:
           try :
             Lon = nc.variables[ 'longitude' ][:]
             Lat = nc.variables[ 'latitude' ][:]
             LON, LAT = np.meshgrid( Lon, Lat )
           except :
             LON = nc.variables[ 'lon' ][:]
             LAT = nc.variables[ 'lat' ][:]
        ## Get Variable
        dum = nc.variables[ KeyVar ]
        Var = dum[:]; ind = ( Var == dum._FillValue ); Var[ind] = np.nan
        ## save
        if iF == 0 : out = Var; tout = Time
        else       : out = np.concatenate( [out,Var], axis=0 ); tout = np.concatenate( [tout,Time], axis=0 )
    print tout[0], tout[-1], tout.shape, out.shape, LON.shape
    try    : return tout, LON, LAT, out, dum.units, dum.long_name
    except : return tout, LON, LAT, out, dum.units, dum.standard_name


def Read_NetCDF( fname, KeyVar ) : 
    'Read NetCDF file'
    if "*" in fname : nc = MFDataset( fname, 'r' )
    else            : nc =   Dataset( fname, 'r' )
    ## Get time using the time variable
    Time_Var = nc.variables[ 'time']
    dt = Time_Var[:][1] - Time_Var[:][0]
    Time_H   = np.arange( Time_Var[:][0], Time_Var[:][0]+dt*Time_Var[:].size, dt ) 
    try :    Tref = netcdftime.utime( Time_Var.units, calendar = Time_Var.calendar )
    except : Tref = netcdftime.utime( Time_Var.units, calendar = "gregorian" )
    Time = Tref.num2date( Time_H )
    print "====================++++"
    ## Get Coordinates
    try :
       Lon = nc.variables[ 'longitude' ][:]
       Lat = nc.variables[ 'latitude' ][:]
       LON, LAT = np.meshgrid( Lon, Lat )
    except :
       LON = nc.variables[ 'lon' ][:]
       LAT = nc.variables[ 'lat' ][:]

    ## Get Variable
    dum = nc.variables[ KeyVar ]
    Var = dum[:]
    ind = ( Var == dum._FillValue )
    Var[ind] = np.nan 
    #Var = Var / dum.scale_factor + dum.add_offset
    ind = (np.isnan(Var))
    Var[ind] = -9999999

    print Time[0], Time[-1], Var.shape, Time.shape, np.sum(ind)
    try    : return Time, LON, LAT, Var, dum.units, dum.long_name
    except : return Time, LON, LAT, Var, dum.units, dum.standard_name

#=================== MANIPULATE NetCDF =======================

def compute_scale_and_offset( Var, n ):
    'http://james.hiebert.name/blog/work/2015/04/18/NetCDF-Scale-Factors/'
    Vmin = np.nanmin( Var )
    Vmax = np.nanmax( Var )
    print "scaleoffset", Vmin, Vmax
    # stretch/compress data to the available packed range
    scale_factor = (Vmax - Vmin) / (2 ** n - 1)
    # translate the range to be symmetric about zero
    add_offset = Vmin + 2 ** (n - 1) * scale_factor
    return scale_factor, add_offset

def Add_Variable( nc, vName, vDim, vVal, long_name=None, units=None, standard_name=None, fill_value=None) :
    "Add a variable with its attributes in a netcdf file"
    if vName not in ['time','lon','lat',] : fprec = 'i'
    else : fprec = 'f8'

    if fill_value != None : nc.createVariable( vName, fprec, vDim, fill_value=fill_value, zlib=True, complevel=5 )
    else                  : nc.createVariable( vName, fprec, vDim, zlib=True, complevel=5 )
    if long_name     != None : nc.variables[ vName ].long_name     = long_name
    if units         != None : nc.variables[ vName ].units         = units
    if standard_name != None : nc.variables[ vName ].standard_name = standard_name
    if vName not in ['time','lon','lat',] :
       sc, off = compute_scale_and_offset( vVal, 16 )
       nc.variables[ vName ].scale_factor = sc
       nc.variables[ vName ].add_offset   = off

    nc.variables[ vName ][:] = vVal   # np.floor((vVal-off)/sc)

def Create_Dimensions( nc, lon_name, nLon, lat_name, nLat ) :
    "Create NetCDF dimensions time, nx, ny"
    nc.createDimension( lon_name , nLon )
    nc.createDimension( lat_name , nLat )
    nc.createDimension( 'time'   , None )

def Create_NetCDF_core( nc, tDim, tRef, tVal, sDim, sVal_lon, sVal_lat ) :
    "Create Lon, Lat and Time variables"
    # WRITE TIME INFO
    tUnit = "days since {0} UTC".format( tRef.strftime( "%Y-%m-%d %H:%M:%S" ) ); tCal  = "standard"
    Tref  = netcdftime.utime( tUnit, calendar = tCal )
    Add_Variable( nc, 'time', ('time'), Tref.date2num( tVal ), \
                  long_name = "time since {0}".format(tUnit) , \
                  units = tUnit )
    nc.variables['time'].calendar  = tCal
    #nc.variables['time'].base_date = np.array( [ tRef.year, tRef.month, tRef.day, tRef.hour ] )

    # WRITE LON INFO
    Add_Variable( nc, 'lon', sDim, sVal_lon, long_name = 'Longitude', \
                  units = 'degree_east', standard_name = 'longitude'  )

    # WRITE L INFOT
    Add_Variable( nc, 'lat', sDim, sVal_lat, long_name = 'Latitude', \
                  units = 'degree_north', standard_name = 'latitude' )

def Create_Attributes( nc ) :
    "Add some info - I do it at the end as I had issue with not properly readable netcdf if not"
    nc.Description = 'ERA5 Atmospheric conditions for AMM15 NEMO3.6'
    nc.Author      = 'Prepare_ERA5.py'
    nc.Created     = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    nc.Conventions = "CF-1.0"
    nc.close()

#======================= EXTRACTION ==========================

def Extract( fin, fout, clean=True ) :
    if clean : os.system( "rm {0}".format( fout ) )
    if not os.path.exists( fout ) :
       command = "ncks -d latitude,{0},{1} -d longitude,{2},{3} {4} {5}".format( np.float(South), np.float(North),\
                                                                                 np.float(West), np.float(East), fin, fout )
       print command
       os.system( command )
      
def datetime_range(start, end, delta):
    current = [start, ]
    while current[-1] < end:
        current.append( current[-1]+delta )
    return np.array(current)

#======================= CORE PROGR ==========================

## load NCO
os.system( "module load nco/gcc/4.4.2.ncwa" )
os.system( "mkdir {0} {1}".format( path_EXTRACT, path_FORCING ) )
if West < 0 : West = 360.+West
if East < 0 : East = 360.+East

## Loop over each variable
for dirVar, nameVar in var_path.iteritems() :

    print "================== {0} - {1} ==================".format( dirVar, nameVar )

    ##---------- EXTRACT ALL DATA FOR DOMAIN ----------------
    for iY in range( Year_init, Year_end+1 ) :
        ## Files
        finput  = "{0}/{1}/{2}_{1}.nc".format( path_ERA5, dirVar, iY )
        foutput = "./{2}/{0}_{1}.nc".format( nameVar, iY, path_EXTRACT )
        ## Extract the subdomain
        Extract( finput, foutput, clean=clean ) 

    ##---------- LOAD FULLL TIME SERIES IN MEMORY -----------
    Time, Lon, Lat, dum, Units, Name = Read_NetCDF_Concatenate( "./{1}/{0}_*.nc".format( nameVar, path_EXTRACT ), nameVar )
    print "Time" , Time

    dt  = Time[1] - Time[0]   ## assume to be constant in time
    dt2 = datetime.timedelta( seconds=dt.total_seconds() / 2. )
    print "dt", dt, dt2

    ##---------- SOME PREPROCESSING -------------------------
    ## Add time step for last hour - copy the last input
    dumA  = np.concatenate( [  dum,  dum[-1][np.newaxis,...] ], axis = 0 )
    TimeA = np.array( Time.tolist() + [Time[-1],]  )

    print "Time" , Time
    print "TimeA", TimeA
    ## instantaneous field every hour. we center it in mid-time step (00:30) as it
    ## is what NEMO assumes according to documentation
    dumC  = ( dumA[0:-1] + dumA[1::] ) / 2.0   
    TimeC =  TimeA[0:-1] + dt2           ## shift half time step positively due to averaging
    suffix = ''

    print "TimeC", TimeC

    ##---------- OUTPUT A FILE PER YEAR ---------------------
    for iY in range( Year_init, Year_end+1 ) :

        print datetime.datetime( iY  ,1,1 ), datetime.datetime( iY+1,1,1 )
        indT = ( np.array(TimeC) >= datetime.datetime( iY  ,1,1,0,0,0 ) ) \
             * ( np.array(TimeC) <  datetime.datetime( iY+1,1,1,0,0,0 ) )
        print "indT",np.sum(indT)
 
        if nameVar in [ "d2m", "sp" ] :
               Fout = "./{2}/forSPH_ERA5_{0}_y{1}.nc".format( nameVar.upper(), iY, path_FORCING )
        else : Fout = "./{2}/ERA5_{0}_y{1}.nc".format( nameVar.upper(), iY, path_FORCING )
        nc = Dataset( Fout, 'w', format='NETCDF4_CLASSIC')
        Create_Dimensions ( nc, 'nLon', Lon.shape[1], 'nLat' , Lat.shape[0] )
        Create_NetCDF_core( nc, ('time'), TimeC[indT][0], TimeC[indT], ('nLat', 'nLon'), Lon[::-1,:], Lat[::-1,:] )
        Add_Variable( nc, nameVar.upper(), ( 'time', 'nLat', 'nLon'), dumC[indT,::-1,:], units=Units+suffix, standard_name=Name, fill_value=-999999 )
        Create_Attributes( nc )


##---------- PROCESS SPECIFIC HUMIDITY ----------------------     
## Compute Specific Humidity according to ECMWF documentation

if sph_ON : 

   for iY in range( Year_init, Year_end+1 ) :
       Time, Lon, Lat, d2m, dUnits, dName = Read_NetCDF( "./{1}/forSPH_ERA5_D2M_y{0}.nc".format( iY, path_FORCING ), 'D2M' )
       Time, Lon, Lat, sp , dUnits, dName = Read_NetCDF( "./{1}/forSPH_ERA5_SP_y{0}.nc" .format( iY, path_FORCING ), 'SP'  )
       esat = 611.21 * np.exp( 17.502 * (d2m-273.16) / (d2m-32.19) )
       dyrvap = 287.0597 / 461.5250
       dVar = dyrvap * esat / ( sp - (1-dyrvap) * esat)
       Units = "1"; Name = "Specific Humidity"

       indT = ( Time >= datetime.datetime( iY  ,1,1 ) ) \
            * ( Time <  datetime.datetime( iY+1,1,1 ) )

       Fout = "./{1}/ERA5_SPH_y{0}.nc".format( iY, path_FORCING )
       nc = Dataset( Fout, 'w', format='NETCDF4_CLASSIC')
       Create_Dimensions ( nc, 'nLon', Lon.shape[1], 'nLat' , Lat.shape[0] )
       Create_NetCDF_core( nc, ('time'), Time[indT][0], Time[indT], ('nLat', 'nLon'), Lon[:,:], Lat[:,:] )
       Add_Variable( nc, "SPH", ( 'time', 'nLat', 'nLon'), dVar[indT,:,:], units=Units, standard_name=Name, fill_value=-999999 )
       Create_Attributes( nc )

Activate the python environment that has the numpy libraries installed (in my case this was nrct_env)::

module load anaconda
source activate nrct_env

Load modules::

module load nco/gcc/4.4.2

Run the script::

python OFFICIAL_Generate_NEMO_Forcing_NEWERA.py

Depending on the size of the region and the data range of the data, this will take a while.

2. Create WEIGHTS for the atmospheric forcing

Once the ERA5 data has downloaded, generate weights for the atmospheric forcing using the script create_WA.sh. Instructions for this can be found on the SEAsia wiki here

cd $SBC

#The ERA5 fields have been downloaded in livljobs
ln -s $DOMAIN/coordinates.nc $SBC/.
# Ensure the correct modules are loaded for ARCHER2
# Load modules listed in /work/n01/shared/nemo/setup
# Tested 14Jan22
module swap craype-network-ofi craype-network-ucx
module swap cray-mpich cray-mpich-ucx
module load cray-hdf5-parallel/1.12.0.7
module load cray-netcdf-hdf5parallel/4.7.4.7
#obtain namelists
cp $GITCLONE/ATMOS_FORCING/namelist_reshape_bicubic_atmos $SBC/.
cp $GITCLONE/ATMOS_FORCING/namelist_reshape_bilin_atmos $SBC/.
#generate weights
$TDIR/WEIGHTS/scripgrid.exe namelist_reshape_bilin_atmos
$TDIR/WEIGHTS/scrip.exe namelist_reshape_bilin_atmos
$TDIR/WEIGHTS/scripshape.exe namelist_reshape_bilin_atmos
$TDIR/WEIGHTS/scrip.exe namelist_reshape_bicubic_atmos
$TDIR/WEIGHTS/scripshape.exe namelist_reshape_bicubic_atmos
cd $WORK

My data are here :

/work/n01/n01/valegu/EA_R12_BIO/nemo_vr4.06/cfgs/ORCA2_MEDUSA/EXP_FINAL/ERA5