# Import various libraries
From the PO.DAAC Cookbook, to access the GitHub version of the notebook, follow this link.
The spatial Correlation between sea surface temperature anomaly and sea surface height anomaly in the Indian Ocean – A demo using ECCO
Date: 2022-02-15
Objective
This tutorial will use data from the Estimating the Climate and Circulation of the Ocean (ECCO) model to derive spatial correlations through time for two regions of the Indian Ocean. The goal is to investigate the correlative characteristics of the Indian Ocean Dipole and how the east and west regions behave differently. This investigation was motivated by Fig 2 a,b in the paper by Wang et al. (2016).
- Wang, H., Murtugudde, R. & Kumar, A. Evolution of Indian Ocean dipole and its forcing mechanisms in the absence of ENSO. Clim Dyn 47, 2481–2500 (2016). https://doi.org/10.1007/s00382-016-2977-y
Input datasets
- ECCO Ocean Temperature and Salinity - Monthly Mean 0.5 Degree (Version 4 Release 4). DOI: https://doi.org/10.5067/ECG5M-OTS44
- ECCO Sea Surface Height - Daily Mean 0.5 Degree (Version 4 Release 4b). DOI: https://doi.org/10.5067/ECG5D-SSH4B
Services and software used
- NASA Harmony netcdf-to-Zarr
- xarray, requests, json, pandas, numpy, matplotlib, s3fs
- Various python utlities including the NASA harmony-py package
from matplotlib import pylab as plt
import xarray as xr
import numpy as np
import pandas as pd
import requests
import json
import time
import s3fs
import re
from datetime import datetime
from harmony import Client, Collection, Environment, Request
Identify the ShortName of the dataset of interest.
The shortname is an unique pointer for each NASA dataset.
= "ECCO_L4_SSH_05DEG_MONTHLY_V4R4B" ShortName
Set start and end dates
= "1992-01-01"
start_date = "2002-12-31"
end_date
# break it down into Year, Month, Day (and minutes and seconds if desired)
# as inputs to harmony.py call using datetime()
= 2002
start_year = 1
start_month = 1
start_day
= 2017
end_year = 12
end_month = 31 end_day
Spatial bounds (Region of Interest) – Not used
= 100.
westernmost_longitude = 150.
easternmost_longitude = 30.
northermost_latitude = 0. southernmost_latitude
Find the concept_id
= requests.get(
response ='https://cmr.earthdata.nasa.gov/search/collections.umm_json',
url={'provider': "POCLOUD",
params'ShortName': ShortName,
'page_size': 1}
)
= response.json()['items'][0]
ummc
= ummc['meta']['concept-id']
ccid
ccid
'C2129189405-POCLOUD'
NetCDF to Zarr transformation
Setup the harmony-py service call and execute a request
# using the the harmony.py service, set up the request and exectue it
= Collection(id=ccid)
ecco_collection = {'start': datetime(start_year, start_month, start_day), 'stop': datetime(end_year, end_month, end_day)}
time_range print(time_range)
= Client(env=Environment.PROD)
harmony_client
# in this example set concatentae to 'False' because the monthly input time steps vary slightly
# (not always centered in the middle of month)
= Request(collection=ecco_collection, temporal=time_range, format='application/x-zarr', concatenate='False')
ecco_request
# sumbit request and monitor job
= harmony_client.submit(ecco_request)
ecco_job_id print('\n Waiting for the job to finish. . .\n')
= harmony_client.result_json(ecco_job_id, show_progress=True)
ecco_response print("\n. . .DONE!")
{'start': datetime.datetime(2002, 1, 1, 0, 0), 'stop': datetime.datetime(2017, 12, 31, 0, 0)}
Waiting for the job to finish. . .
[ Processing: 100% ] |###################################################| [|]
. . .DONE!
You can also wrap the creation of the Harmony request URL into one function. Shown here for legacy purposes (does not execute a Harmony request):
def get_harmony_url(ccid,start_date,end_date):
"""
Parameters:
===========
ccid: string
concept_id of the datset
date_range: list
[start_data, end_date]
Return:
=======
url: the harmony URL used to perform the netcdf to zarr transformation
"""
= f"https://harmony.earthdata.nasa.gov/{ccid}"
base = f"{base}/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset"
hreq = f"{hreq}?format=application/x-zarr"
rurl
#print(rurl)
= '&'.join([f'subset=time("{start_date}T00:00:00.000Z":"{end_date}T23:59:59.999Z")'])
subs #subs = subs + '&' + '&'.join([f'subset=lat({southernmost_latitude}:{northermost_latitude})'])
#subs = subs + '&' + '&'.join([f'subset=lon({westernmost_longitude}:{easternmost_longitude})'])
= f"{rurl}&{subs}"
rurl return rurl
='C2129189405-POCLOUD'
ccidprint(get_harmony_url(ccid,start_date,end_date))
# this is the way you would execute it
# response = requests.get(url=rurl).json()
https://harmony.earthdata.nasa.gov/C2129189405-POCLOUD/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?format=application/x-zarr&subset=time("1992-01-01T00:00:00.000Z":"2002-12-31T23:59:59.999Z")
Access the staged cloud datasets over native AWS interfaces
print(ecco_response['message'])
with requests.get(ecco_response['links'][2]['href']) as r:
= r.json()
creds
print( creds.keys() )
print("AWS credentials expire on: ", creds['Expiration'] )
The job has completed successfully. Contains results in AWS S3. Access from AWS us-west-2 with keys from https://harmony.earthdata.nasa.gov/cloud-access.sh
dict_keys(['AccessKeyId', 'SecretAccessKey', 'SessionToken', 'Expiration'])
AWS credentials expire on: 2022-05-03T08:45:27.000Z
List zarr datasets with s3fs
= ecco_response['links'][3]['href']
s3_dir
print("root directory:", s3_dir)
= [u['href'] for u in ecco_response['links'][4:-1]]
s3_urls
# sort the URLs in time order
s3_urls.sort()
print(s3_urls[0])
= s3fs.S3FileSystem(
s3 =creds['AccessKeyId'],
key=creds['SecretAccessKey'],
secret=creds['SessionToken'],
token={'region_name':'us-west-2'},
client_kwargs
)
len(s3.ls(s3_dir))
root directory: s3://harmony-prod-staging/public/harmony/netcdf-to-zarr/0164d106-f8f0-431e-926c-c7d3f5f0575e/
s3://harmony-prod-staging/public/harmony/netcdf-to-zarr/0164d106-f8f0-431e-926c-c7d3f5f0575e/SEA_SURFACE_HEIGHT_mon_mean_2001-12_ECCO_V4r4b_latlon_0p50deg.zarr
193
Open datasets with xarray()
= xr.open_zarr(s3.get_mapper(s3_urls[0]), decode_cf=True, mask_and_scale=True)
ds0 ds0
<xarray.Dataset> Dimensions: (time: 1, latitude: 360, longitude: 720, nv: 2) Coordinates: * latitude (latitude) float32 -89.75 -89.25 -88.75 ... 89.25 89.75 latitude_bnds (latitude, nv) float32 dask.array<chunksize=(360, 2), meta=np.ndarray> * longitude (longitude) float32 -179.8 -179.2 -178.8 ... 179.2 179.8 longitude_bnds (longitude, nv) float32 dask.array<chunksize=(720, 2), meta=np.ndarray> * time (time) datetime64[ns] 2001-12-16T12:00:00 time_bnds (time, nv) datetime64[ns] dask.array<chunksize=(1, 2), meta=np.ndarray> Dimensions without coordinates: nv Data variables: SSH (time, latitude, longitude) float32 dask.array<chunksize=(1, 360, 720), meta=np.ndarray> SSHIBC (time, latitude, longitude) float32 dask.array<chunksize=(1, 360, 720), meta=np.ndarray> SSHNOIBC (time, latitude, longitude) float32 dask.array<chunksize=(1, 360, 720), meta=np.ndarray> Attributes: (12/57) Conventions: CF-1.8, ACDD-1.3 acknowledgement: This research was carried out by the Jet Pr... author: Ou Wang and Ian Fenty cdm_data_type: Grid comment: Fields provided on a regular lat-lon grid. ... coordinates_comment: Note: the global 'coordinates' attribute de... ... ... time_coverage_duration: P1M time_coverage_end: 2002-01-01T00:00:00 time_coverage_resolution: P1M time_coverage_start: 2001-12-01T00:00:00 title: ECCO Sea Surface Height - Monthly Mean 0.5 ... uuid: cd67c8ad-0f9e-4cf7-a3c3-4ac4d6016ea5
Concatenate all granules (datasets) via a loop
= xr.concat([xr.open_zarr(s3.get_mapper(u)) for u in s3_urls], dim="time", coords='minimal')
ssh_ds
ssh_ds#ssh_ds_group = xr.concat([xr.open_zarr(s3.get_mapper(u)) for u in s3_urls], dim="time", coords='minimal').groupby('time.month')
<xarray.Dataset> Dimensions: (time: 193, latitude: 360, longitude: 720, nv: 2) Coordinates: * latitude (latitude) float32 -89.75 -89.25 -88.75 ... 89.25 89.75 latitude_bnds (latitude, nv) float32 -90.0 -89.5 -89.5 ... 89.5 89.5 90.0 * longitude (longitude) float32 -179.8 -179.2 -178.8 ... 179.2 179.8 longitude_bnds (longitude, nv) float32 -180.0 -179.5 -179.5 ... 179.5 180.0 * time (time) datetime64[ns] 2001-12-16T12:00:00 ... 2017-12-16T... time_bnds (time, nv) datetime64[ns] dask.array<chunksize=(1, 2), meta=np.ndarray> Dimensions without coordinates: nv Data variables: SSH (time, latitude, longitude) float32 dask.array<chunksize=(1, 360, 720), meta=np.ndarray> SSHIBC (time, latitude, longitude) float32 dask.array<chunksize=(1, 360, 720), meta=np.ndarray> SSHNOIBC (time, latitude, longitude) float32 dask.array<chunksize=(1, 360, 720), meta=np.ndarray> Attributes: (12/57) Conventions: CF-1.8, ACDD-1.3 acknowledgement: This research was carried out by the Jet Pr... author: Ou Wang and Ian Fenty cdm_data_type: Grid comment: Fields provided on a regular lat-lon grid. ... coordinates_comment: Note: the global 'coordinates' attribute de... ... ... time_coverage_duration: P1M time_coverage_end: 2002-01-01T00:00:00 time_coverage_resolution: P1M time_coverage_start: 2001-12-01T00:00:00 title: ECCO Sea Surface Height - Monthly Mean 0.5 ... uuid: cd67c8ad-0f9e-4cf7-a3c3-4ac4d6016ea5
Rinse and Repeat all steps for the second (ocean/salinity temperature) dataset
Run 2nd Harmony netCDF-to-Zarr call
= "ECCO_L4_TEMP_SALINITY_05DEG_MONTHLY_V4R4"
ShortName
# 1) Find new concept_id for this dataset
= requests.get(
response ='https://cmr.earthdata.nasa.gov/search/collections.umm_json',
url={'provider': "POCLOUD",
params'ShortName': ShortName,
'page_size': 1}
)
= response.json()['items'][0]
ummc = ummc['meta']['concept-id']
ccid
# using the the harmony.py service, set up the request and exectue it
= Collection(id=ccid)
ecco_collection = {'start': datetime(start_year, start_month, start_day), 'stop': datetime(end_year, end_month, end_day)}
time_range
= Client(env=Environment.PROD)
harmony_client
# in this example set concatentae to 'False' because the monthly input time steps vary slightly
# (not always centered in the middle of month)
= Request(collection=ecco_collection, temporal=time_range, format='application/x-zarr', concatenate='False')
ecco_request
# sumbit request and monitor job
= harmony_client.submit(ecco_request)
ecco_job_id print('\nWaiting for the job to finish. . .\n')
= harmony_client.result_json(ecco_job_id, show_progress=True)
ecco_response print("\n. . .DONE!")
Waiting for the job to finish. . .
[ Processing: 100% ] |###################################################| [|]
. . .DONE!
Read the S3 Zarr endpoints and aggregate to single Zarr
# 1) read the AWS credentials
print(ecco_response['message'])
with requests.get(ecco_response['links'][2]['href']) as r:
= r.json()
creds
print( creds.keys() )
print("AWS credentials expire on: ", creds['Expiration'] )
# 2) print root directory and read the s3 URLs into a list
= ecco_response['links'][3]['href']
s3_dir2 print("root directory:", s3_dir2)
= [u['href'] for u in ecco_response['links'][4:-1]]
s3_urls2
# sort the URLs in time order
s3_urls2.sort()
# 3) Autenticate AWS S3 credentials
= s3fs.S3FileSystem(
s3 =creds['AccessKeyId'],
key=creds['SecretAccessKey'],
secret=creds['SessionToken'],
token={'region_name':'us-west-2'},
client_kwargs
)
# 4) Read and concatenate into a single Zarr dataset
= xr.concat([xr.open_zarr(s3.get_mapper(u)) for u in s3_urls2], dim="time", coords='minimal')
temp_ds #temp_ds_group = xr.concat([xr.open_zarr(s3.get_mapper(u)) for u in s3_urls2], dim="time", coords='minimal').groupby('time.month')
temp_ds
The job has completed successfully. Contains results in AWS S3. Access from AWS us-west-2 with keys from https://harmony.earthdata.nasa.gov/cloud-access.sh
dict_keys(['AccessKeyId', 'SecretAccessKey', 'SessionToken', 'Expiration'])
AWS credentials expire on: 2022-05-03T08:48:44.000Z
root directory: s3://harmony-prod-staging/public/harmony/netcdf-to-zarr/9dd5c9b3-84c4-48e5-907d-2bd21d92a60c/
<xarray.Dataset> Dimensions: (time: 193, Z: 50, latitude: 360, longitude: 720, nv: 2) Coordinates: * Z (Z) float32 -5.0 -15.0 -25.0 ... -5.461e+03 -5.906e+03 Z_bnds (Z, nv) float32 nan -10.0 -10.0 ... -5.678e+03 -6.134e+03 * latitude (latitude) float32 -89.75 -89.25 -88.75 ... 89.25 89.75 latitude_bnds (latitude, nv) float32 -90.0 -89.5 -89.5 ... 89.5 89.5 90.0 * longitude (longitude) float32 -179.8 -179.2 -178.8 ... 179.2 179.8 longitude_bnds (longitude, nv) float32 -180.0 -179.5 -179.5 ... 179.5 180.0 * time (time) datetime64[ns] 2001-12-16T12:00:00 ... 2017-12-16T... time_bnds (time, nv) datetime64[ns] dask.array<chunksize=(1, 2), meta=np.ndarray> Dimensions without coordinates: nv Data variables: SALT (time, Z, latitude, longitude) float32 dask.array<chunksize=(1, 50, 280, 280), meta=np.ndarray> THETA (time, Z, latitude, longitude) float32 dask.array<chunksize=(1, 50, 280, 280), meta=np.ndarray> Attributes: (12/62) Conventions: CF-1.8, ACDD-1.3 acknowledgement: This research was carried out by the Jet... author: Ian Fenty and Ou Wang cdm_data_type: Grid comment: Fields provided on a regular lat-lon gri... coordinates_comment: Note: the global 'coordinates' attribute... ... ... time_coverage_duration: P1M time_coverage_end: 2002-01-01T00:00:00 time_coverage_resolution: P1M time_coverage_start: 2001-12-01T00:00:00 title: ECCO Ocean Temperature and Salinity - Mo... uuid: 7f718714-4159-11eb-8bbd-0cc47a3f819b
Do data masking, calculate a SST anomaly, and plot some figures
# Mask for the good data. Everything else defaults to NaN
# SST missing value 9.9692100e+36
# SSH missing value 9.9692100e+36
= (ssh_ds < 1000)
cond = ssh_ds['SSH'].where(cond)
ssh_ds_masked
= (temp_ds < 1000)
cond = temp_ds['THETA'].where(cond)
temp_ds_masked
# Derive a SST climatology and subtract it from the SST to create SST anomaly and remove trends
= temp_ds_masked.groupby('time.month').mean('time',keep_attrs=True,skipna=False)
climatology_mean = temp_ds_masked.groupby('time.month') - climatology_mean # subtract out longterm monthly mean
temp_ds_masked_anomaly
=plt.subplots(1,3,figsize=(25,5))
fig,ax
# take a slice of the Indian Ocean and plot SSH, SST, SST anomaly
'SSH'][6].sel(longitude=slice(40,120),latitude=slice(-30,20)).plot(ax=ax[0], vmin=-0.5,vmax=1.25)
ssh_ds_masked['THETA'][6].sel(longitude=slice(40,120),latitude=slice(-30,20), Z=slice(0,-5)).plot(ax=ax[1], vmin=10,vmax=32)
temp_ds_masked['THETA'][6].sel(longitude=slice(40,120),latitude=slice(-30,20), Z=slice(0,-5)).plot(ax=ax[2], vmin=-2,vmax=2) temp_ds_masked_anomaly[
Perform the correlations in the east and west Indian Ocean
# Western and Eastern Indian Ocean regions (WIO and EIO respectively)
# EIO; 90 –110 E, 10 S–0N
# WIO; 50 –70 E, 10 S–10 N
# Group Eastern Indian Ocean data by month. This will make the correlation of all monthly values straightforwrd.
= ssh_ds_masked['SSH'].sel(longitude=slice(90,110),latitude=slice(-10,0)).groupby('time.month')
ssh_group #temp_group = temp_ds_masked['THETA'].sel(longitude=slice(90,110),latitude=slice(-10,0), Z=-5.0).drop('Z').groupby('time.month')
= temp_ds_masked_anomaly['THETA'].sel(longitude=slice(90,110),latitude=slice(-10,0), Z=-5.0).drop('Z').groupby('time.month')
temp_group
print(" Running correlations in eastern Indian Ocean . . .\n")
= []
corr for month in range(1,13):
corr.append(xr.corr(ssh_group[month], temp_group[month]))#print("\nthe correlation in the east is: " , xr.corr(ssh_group[month], temp_group[month]).values)
# Do some plotting
=plt.subplots(1,2,figsize=(14,8))
fig,ax
0].set_title("Spatial correlation in Eastern Indian Ocean",fontsize=16)
ax[0].set_ylabel("Correlation",fontsize=16)
ax[0].set_xlabel("Month",fontsize=16)
ax[0].set_ylim([-1, 1])
ax[0].plot(corr)
ax[
# Repeat for Western Indian Ocean
# Group the data by month. This will make the correlation of all monthly values straightforwrd.
= ssh_ds_masked['SSH'].sel(longitude=slice(50,70),latitude=slice(-10,10)).groupby('time.month')
ssh_group #temp_group = temp_ds_masked['THETA'].sel(longitude=slice(50,70),latitude=slice(-10,10), Z=-5.0).drop('Z').groupby('time.month')
= temp_ds_masked_anomaly['THETA'].sel(longitude=slice(50,70),latitude=slice(-10,10), Z=-5.0).drop('Z').groupby('time.month')
temp_group
print(" Running correlations in western Indian Ocean . . .\n")
=[]
corr2 for month in range(1,13):
corr2.append(xr.corr(ssh_group[month], temp_group[month]))#print("\nthe correlation in the west is: " , xr.corr(ssh_group[month], temp_group[month]).values)
1].set_title("Spatial correlation in Western Indian Ocean",fontsize=16)
ax[1].set_ylabel("Correlation",fontsize=16)
ax[1].set_xlabel("Month",fontsize=16)
ax[1].set_ylim([-1, 1])
ax[1].plot(corr2) ax[
Running correlations in eastern Indian Ocean . . .
Running correlations in western Indian Ocean . . .