# dea_climate.py'''Retrieving and manipulating gridded climate data.Adapted from scripts by Andrew Cherry and Brian Killough.License: The code in this notebook is licensed under the Apache License,Version 2.0 (https://www.apache.org/licenses/LICENSE-2.0). Digital Earth Australia data is licensed under the Creative Commons by Attribution 4.0 license (https://creativecommons.org/licenses/by/4.0/).Contact: If you need assistance, please post a question on the Open Data Cube Discord chat (https://discord.com/invite/4hhBQVas5U) or on the GIS Stack Exchange (https://gis.stackexchange.com/questions/ask?tags=open-data-cube) using the `open-data-cube` tag (you can view previously asked questions here: https://gis.stackexchange.com/questions/tagged/open-data-cube). If you would like to report an issue with this script, you can file one on GitHub https://github.com/digitalearthafrica/deafrica-sandbox-notebooks/issuesLast modified: October 2020'''importosimportdatetimeimportnumpyasnpfromdateutil.parserimportparseimportboto3importbotocoreimportxarrayasxrimportwarningsERA5_VARS=["air_pressure_at_mean_sea_level","air_temperature_at_2_metres","air_temperature_at_2_metres_1hour_Maximum","air_temperature_at_2_metres_1hour_Minimum","dew_point_temperature_at_2_metres","eastward_wind_at_100_metres","eastward_wind_at_10_metres","integral_wrt_time_of_surface_direct_downwelling_shortwave_flux_in_air_1hour_Accumulation","lwe_thickness_of_surface_snow_amount","northward_wind_at_100_metres","northward_wind_at_10_metres","precipitation_amount_1hour_Accumulation","sea_surface_temperature","sea_surface_wave_from_direction","sea_surface_wave_mean_period","significant_height_of_wind_and_swell_waves","snow_density","surface_air_pressure",]
[docs]defget_era5_daily(var,date_from_arg,date_to_arg=None,reduce_func=None,cache_dir='era5',resample='1D'):""" Download and return an variable from the European Centre for Medium Range Weather Forecasts (ECMWF) global climate reanalysis product (ERA5) for a defined time window. Parameters ---------- var : string Name of the ERA5 climate variable to download, e.g "air_temperature_at_2_metres" date_from_arg: string or datetime object Starting date of the time window. date_to_arg: string or datetime object End date of the time window. If not supplied, set to be the same as starting date. reduce_func: numpy function lets you specify a function to apply to each day's worth of data. The default is np.mean, which computes daily average. To get a sum, use np.sum. cache_dir: sting Path to save downloaded ERA5 data. The path will be created if not already exists. The default is 'era5'. resample: string Temporal resampling frequency to be used for xarray's resample function. The default is '1D', which is daily. Since ERA5 data is provided as one file per month, maximum resampling period is '1M'. Returns ------- A lazy-loaded xarray dataset containing an ERA5 variable for the selected time window. """# Massage input dataassertvarinERA5_VARS,"var must be one of [{}] (got {})".format(','.join(ERA5_VARS),var)ifnotos.path.exists(cache_dir):os.mkdir(cache_dir)ifreduce_funcisNone:reduce_func=np.meaniftype(date_from_arg)==str:date_from_arg=parse(date_from_arg)iftype(date_to_arg)==str:date_to_arg=parse(date_to_arg)ifdate_to_argisNone:date_to_arg=date_from_arg# Make sure our dates are in the correct orderfrom_date=min(date_from_arg,date_to_arg)to_date=max(date_from_arg,date_to_arg)# Download ERA5 files to local cache if they don't already existclient=None# Boto client (if needed)local_files=[]# Will hold list of local filenamesY,M=from_date.year,from_date.month# Loop varsloop_end=to_date.year*12+to_date.month# Loop sentinelwhileY*12+M<=loop_end:local_file=os.path.join(cache_dir,"{Y:04}_{M:02}_{var}.nc".format(Y=Y,M=M,var=var))data_key="{Y:04}/{M:02}/data/{var}.nc".format(Y=Y,M=M,var=var)ifnotos.path.isfile(local_file):# check if file already exists (TODO: move to temp, catch failed download)ifclientisNone:client=boto3.client('s3',config=botocore.client.Config(signature_version=botocore.UNSIGNED))client.download_file('era5-pds',data_key,local_file)local_files.append(local_file)ifM==12:Y+=1M=1else:M+=1# Load and merge the locally-cached ERA5 data from the list of filenamesdate_slice=slice(str(from_date.date()),str(to_date.date()))# I do this to INCLUDE the whole end date, not just 00:00defprepro(ds):if'time0'inds.dims:ds=ds.rename({"time0":"time"})if'time1'inds.dims:ds=ds.rename({"time1":"time"})# This should INTENTIONALLY error if both times are definedds=ds[[var]]output=ds.sel(time=date_slice).resample(time=resample).reduce(reduce_func)output.attrs=ds.attrsforvinoutput.data_vars:output[v].attrs=ds[v].attrsreturnoutputreturnxr.open_mfdataset(local_files,combine='by_coords',compat='equals',preprocess=prepro,parallel=True)
[docs]defera5_area_crop(ds,lat,lon):""" Crop a dataset containing European Centre for Medium Range Weather Forecasts (ECMWF) global climate reanalysis product (ERA5) variables to a location. The output spatial grid will either include input grid points within lat/lon boundaries or the nearest point if none is within the search location. Parameters ---------- ds : xarray dataset A dataset containing ERA5 variables of interest. lat: tuple or list Latitude range for query. lon: tuple or list Longitude range for query. Returns ------- An xarray dataset containing ERA5 variables for the selected location. """# Handle single value lat/lon args by wrapping them in liststry:min(lat)exceptTypeError:lat=[lat]try:min(lon)exceptTypeError:lon=[lon]ifmin(lon)<0:# re-order along longitude to go from -180 to 180ds=ds.assign_coords({"lon":(((ds.lon+180)%360)-180)})ds=ds.reindex({"lon":np.sort(ds.lon)})# Issue warnings if args outside range.ifmin(lat)<ds.lat.min()ormax(lat)>ds.lat.max():warnings.warn("Lats must be in range {} .. {}. Got: {}".format(ds.lat.min().values,ds.lat.max().values,lat))ifmin(lon)<ds.lon.min()ormax(lon)>ds.lon.max():warnings.warn("Lons must be in range {} .. {}. Got: {}".format(ds.lon.min().values,ds.lon.max().values,lon))# Find existing coords between min&maxlats=ds.lat[np.logical_and(ds.lat>=min(lat),ds.lat<=max(lat))].values# If there was nothing between, just plan to grab closestiflen(lats)==0:lats=np.unique(ds.lat.sel(lat=np.array(lat),method="nearest"))lons=ds.lon[np.logical_and(ds.lon>=min(lon),ds.lon<=max(lon))].valuesiflen(lons)==0:lons=np.unique(ds.lon.sel(lon=np.array(lon),method="nearest"))# crop and keep attrsoutput=ds.sel(lat=lats,lon=lons)output.attrs=ds.attrsforvarinoutput.data_vars:output[var].attrs=ds[var].attrsreturnoutput
[docs]defera5_area_nearest(ds,lat,lon):""" Crop a dataset containing European Centre for Medium Range Weather Forecasts (ECMWF) global climate reanalysis product (ERA5) variables to a location. The output spatial grid is snapped to the nearest input grid points. Parameters ---------- ds : xarray dataset A dataset containing ERA5 variables of interest. lat: tuple or list Latitude range for query. lon: tuple or list Longitude range for query. Returns ------- An xarray dataset containing ERA5 variables for the selected location. """ifmin(lon)<0:# re-order along longitude to go from -180 to 180ds=ds.assign_coords({"lon":(((ds.lon+180)%360)-180)})ds=ds.reindex({"lon":np.sort(ds.lon)})# find the nearest lat lon boundary pointstest=ds.sel(lat=list(lat),lon=list(lon),method='nearest')# define the lat/lon gridlat_range=slice(test.lat.max().values,test.lat.min().values)lon_range=slice(test.lon.min().values,test.lon.max().values)# crop and keep attrsoutput=ds.sel(lat=lat_range,lon=lon_range)output.attrs=ds.attrsforvarinoutput.data_vars:output[var].attrs=ds[var].attrsreturnoutput
[docs]defload_era5(var,lat,lon,time,grid='nearest',**kwargs):""" Returns a European Centre for Medium Range Weather Forecasts (ECMWF) global climate reanalysis product (ERA5) variable for a selected location and time window. Parameters ---------- var : string Name of the ERA5 climate variable to download, e.g "air_temperature_at_2_metres" lat: tuple or list Latitude range for query. lon: tuple or list Longitude range for query. time: tuple or list Time range for query. grid: string Option for output spatial gridding. The default is 'nearest', for which output spatial grid is snapped to the nearest ERA5 input grid points. Alternatively, output spatial grid will either include input grid points within lat/lon boundaries or the nearest point if none is within the search location. Returns ------- An xarray dataset containing the variable for the selected location and time window. """ds=get_era5_daily(var,time[0],time[1],**kwargs)ifgrid=='nearest':returnera5_area_nearest(ds,lat,lon).compute()else:returnera5_area_crop(ds,lat,lon).compute()