import xarray as xr
import numpy as np
from IPython.display import display, JSON
from datetime import datetime, timedelta, time
import os
from harmony import BBox, Client, Collection, Request, Environment, LinkType
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
%matplotlib inline
02. Using Harmony to Subset SWOT Simulated Data
Objectives of this Jupyter notebook
- Use a Harmony request to obtain a temporal and spatial subset of L2 altimetric data in the cloud.
- Construct and submit the Harmony request by using the harmony-py library.
- Open the data in xarray and make a simple plot to visually confirm the download and subset.
Why are we using Harmony here? https://harmony.earthdata.nasa.gov/
- The advantage of reducing data size for access/download, especially for long global time series.
- Harmony allows us to access data from different NASA DAACs in a consistent way (not DAAC-specific).
- A number of different services like spatial subset, variable subset, etc., can all be called from Harmony
Datasets of interest in this notebook
- Nadir GLORYS CalVal: https://podaac.jpl.nasa.gov/dataset/SWOT_SIMULATED_L2_NADIR_SSH_GLORYS_CALVAL_V1
- Nadir GLORYS Science: https://podaac.jpl.nasa.gov/dataset/SWOT_SIMULATED_L2_NADIR_SSH_GLORYS_SCIENCE_V1
- Nadir ECCO CalVal: https://podaac.jpl.nasa.gov/dataset/SWOT_SIMULATED_L2_NADIR_SSH_ECCO_LLC4320_CALVAL_V1
- Nadir ECCO Science: https://podaac.jpl.nasa.gov/dataset/SWOT_SIMULATED_L2_NADIR_SSH_ECCO_LLC4320_SCIENCE_V1
- KaRIn GLORYS CalVal: https://podaac.jpl.nasa.gov/dataset/SWOT_SIMULATED_L2_KARIN_SSH_GLORYS_CALVAL_V1
- KaRIn GLORYS Science: https://podaac.jpl.nasa.gov/dataset/SWOT_SIMULATED_L2_KARIN_SSH_GLORYS_SCIENCE_V1
- KaRIn ECCO CalVal: https://podaac.jpl.nasa.gov/dataset/SWOT_SIMULATED_L2_KARIN_SSH_ECCO_LLC4320_CALVAL_V1
- KaRIn ECCO Science: https://podaac.jpl.nasa.gov/dataset/SWOT_SIMULATED_L2_KARIN_SSH_ECCO_LLC4320_SCIENCE_V1
Import libraries
Note: Install harmony-py to your Python environment before you can import it for the first time. See https://github.com/nasa/harmony-py (For the SWOT Ocean Cloud Wokrshop March 2022, the needed libraries have been pre-loaded and installed in the cloud compute environment.)
Let’s start up the client from the harmony-py library.
# Start the Harmony Client.
= Client(env=Environment.PROD)
harmony_client
# "PROD" stands for production. This is the environment for users.
Temporal and spatial subset using a bounding box around CA Crossover region
- Define the collection of interest by calling Collection(id = YourCollection), where YourCollection is a collection short name or concept-id. There are a number of ways to get the collection
shortname
; using Earthdata Search is one way - see pre-workshop tutorial. - Set time bounds.
- Set spatial bounding box.
- There are also other options such as variables, granules, and concatenation.
= Collection(id='SWOT_SIMULATED_L2_NADIR_SSH_GLORYS_SCIENCE_V1')
collection
= datetime(2015,4,15,0,0,0)
start_day = datetime(2015,4,20,0,0,0)
end_day
= Request(
request =collection,
collection={
temporal'start': start_day,
'stop': end_day
},=BBox(-140, 20, -100, 50), # [20-50N], [140W-100W] CA Current crossover point (35N,125W)
spatial# variables=[],
# granule_id=granuleIDs,
# concatenate = True,
)
request.is_valid()
print(harmony_client.request_as_curl(request))
= harmony_client.submit(request)
job_id print(f'Job ID: {job_id}') # This job id is shareable:show how to do this
View the job status
A Harmony request is limited to 200 granules. The limit is there to prevent users from accidentally make huge requests.
harmony_client.status(job_id)
=True) harmony_client.wait_for_processing(job_id, show_progress
Download subsetted files
Filenames that end with “subsetted.nc4” have been subsetted.
The other filenames (that are un-altered) indicate that these were rounded up as relevant files during Harmony’s search, but do not contain data in the actual region of interest, so the files downloaded here are empty.
# create a new folder to put the subsetted data in
"swot_ocean_basic_subset",exist_ok = True) os.makedirs(
= harmony_client.download_all(job_id, directory='./swot_ocean_basic_subset', overwrite=True)
futures = [f.result() for f in futures]
file_names sorted(file_names)
from os import listdir
from os.path import isfile, join
= [ f for f in file_names if "subsetted" in f]
data_files data_files
Take a look at the subset data
Note: xarray is a little clunky with variables in groups.
= xr.open_mfdataset(sorted(data_files),combine='nested',concat_dim='time',group='data_01')
ds ds
= plt.figure(figsize=[11,7])
fig = plt.axes(projection=ccrs.PlateCarree())
ax
ax.coastlines()-150, -90, 10, 60])
ax.set_extent([=1, c=ds.depth_or_elevation)
plt.scatter(ds.longitude, ds.latitude, lw='Depth or elevation (m)')
plt.colorbar(label-4000,4000)
plt.clim(
plt.show()# ds.plot.scatter( y="latitude",
# x="longitude",
# hue="depth_or_elevation",
# s=1,
# vmin=-4000,
# vmax=4000,
# levels=9,
# cmap="jet",
# aspect=2.5,
# size=9, )