import geopandas as gpd
import glob
from pathlib import Path
import pandas as pd
import os
import zipfile
import earthaccess
from earthaccess import Auth, DataCollections, DataGranules, Store
From the PO.DAAC Cookbook, to access the GitHub version of the notebook, follow this link.
SWOT Shapefile Data Conversion to CSV
Notebook showcasing how to merge/concatenate multiple shapefiles into a single file.
- Utilizing the merged shapefile and converting it to a csv file.
- Option to query the new dataset based on users choice; either ‘reach_id’ or water surface elevation (‘wse’), etc.
- Using the queried variable to export it as a csv or shapefile.
Import libraries
Before you start
Before you beginning this tutorial, make sure you have an account in the Earthdata Login, which is required to access data from the NASA Earthdata system. Please visit https://urs.earthdata.nasa.gov to register for an Earthdata Login account. It is free to create and only takes a moment to set up.
You will also need a netrc file containing your NASA Earthdata Login credentials in order to execute this notebook. A netrc file can be created manually within text editor and saved to your home directory. For additional information see: Authentication for NASA Earthdata
In this notebook, we will be calling the authentication in the below cell.
#auth = earthaccess.login(strategy="interactive", persist=True) #if you do not have a netrc created, this line will do so with your credentials
= earthaccess.login(strategy="netrc") #if you have created a netrc prior with your NASA Earthdata credentials, use strategy="netrc" to login auth
Search Common Metadata Repository (CMR) for SWOT sample data links by Shapefile
We want to find the SWOT sample files that will cross over our region of interest. For this tutorial, we use a shapefile of the United States, finding 44 total granules. Each dataset has it’s own unique collection ID. For the SWOT_SIMULATED_NA_CONTINENT_L2_HR_RIVERSP_V1 dataset, we can find the collection ID here.
= earthaccess.search_data(concept_id="C2263384307-POCLOUD", bounding_box=(-124.848974,24.396308,-66.885444,49.384358)) results
Granules found: 46
Get Download links from CMR search results
= []
downloads for g in results:
for l in earthaccess.results.DataGranule.data_links(g):
if 'https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/' in l:
if 'Reach' in l:
#if the link has "Reach" instead of "Node" in the name, we want to download it for the swath use case
downloads.append(l)
print(len(downloads))
23
Download the Data into a folder
#Create folder to house downloaded data
= Path("SWOT_sample_files")
folder #newpath = r'SWOT_sample_files'
if not os.path.exists(folder):
os.makedirs(folder)
"./SWOT_sample_files") earthaccess.download(downloads,
['SWOT_L2_HR_RiverSP_Reach_007_022_NA_20220804T224145_20220804T224402_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_037_NA_20220805T115553_20220805T120212_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_065_NA_20220806T115630_20220806T120114_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_104_NA_20220807T205936_20220807T210016_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_121_NA_20220808T115628_20220808T120311_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_132_NA_20220808T210018_20220808T210252_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_147_NA_20220809T101525_20220809T101639_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_162_NA_20220809T224722_20220809T225058_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_175_NA_20220810T101607_20220810T101940_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_177_NA_20220810T120102_20220810T120420_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_203_NA_20220811T101614_20220811T102211_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_205_NA_20220811T120350_20220811T120457_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_287_NA_20220814T101759_20220814T102333_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_300_NA_20220814T210504_20220814T210907_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_315_NA_20220815T101757_20220815T102414_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_343_NA_20220816T101844_20220816T102323_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_371_NA_20220817T101846_20220817T102530_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_427_NA_20220819T101956_20220819T102559_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_440_NA_20220819T210905_20220819T211311_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_453_NA_20220820T083815_20220820T084053_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_468_NA_20220820T211105_20220820T211330_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_483_NA_20220821T102527_20220821T102706_PGA0_01.zip',
'SWOT_L2_HR_RiverSP_Reach_007_522_NA_20220822T192441_20220822T193037_PGA0_01.zip']
Unzip shapefiles in existing folder
for item in os.listdir(folder): # loop through items in dir
if item.endswith(".zip"): # check for ".zip" extension
= zipfile.ZipFile(f"{folder}/{item}") # create zipfile object
zip_ref # extract file to dir
zip_ref.extractall(folder) # close file zip_ref.close()
Merging two seperate shapefiles into one
# Read shapefiles
= gpd.read_file(folder / 'SWOT_L2_HR_RiverSP_Reach_007_037_NA_20220805T115553_20220805T120212_PGA0_01.shp')
SWOT_1 = gpd.read_file(folder / 'SWOT_L2_HR_RiverSP_Reach_007_065_NA_20220806T115630_20220806T120114_PGA0_01.shp')
SWOT_2
# Merge/Combine multiple shapefiles into one
= gpd.pd.concat([SWOT_1, SWOT_2])
SWOT_Merge
#Export merged geodataframe into shapefile
/ 'SWOT_Merge.shp') SWOT_Merge.to_file(folder
/Users/walschots/anaconda3/lib/python3.10/site-packages/geopandas/io/file.py:299: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
pd.Int64Index,
Merging multiple shapefiles from within a folder
# State filename extension to look for within folder, in this case .shp which is the shapefile
= folder.glob("*.shp")
shapefiles
# Merge/Combine multiple shapefiles in folder into one
= pd.concat([
gdf
gpd.read_file(shp)for shp in shapefiles
]).pipe(gpd.GeoDataFrame)
# Export merged geodataframe into shapefile
/ 'SWOTReaches.shp') gdf.to_file(folder
/Users/walschots/anaconda3/lib/python3.10/site-packages/geopandas/io/file.py:299: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
pd.Int64Index,
Converting to CSV
Converting merged geodataframe into a csv file.
/ 'csvmerge.csv') gdf.to_csv(folder
Querying a Shapefile
If you want to search for a specific reach id or a specific length of river reach that is possible through a spatial query using Geopandas.
Utilizing comparison operators (>, <, ==, >=, <=).
You can zoom into a particular river reach by specifying by it’s reach_id or looking for duplicate overlapping river reaches.
= gdf.query("reach_id == '74292500301'")
reach reach
reach_id | time | time_tai | time_str | p_lat | p_lon | river_name | wse | wse_u | wse_r_u | ... | p_width | p_wid_var | p_n_nodes | p_dist_out | p_length | p_maf | p_dam_id | p_n_ch_max | p_n_ch_mod | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
51 | 74292500301 | -1.000000e+12 | -1.000000e+12 | no_data | 40.063235 | -98.551296 | no_data | -1.000000e+12 | -1.000000e+12 | -1.000000e+12 | ... | 54.0 | 387.837794 | 47 | 3200409.359 | 9496.587434 | -1.000000e+12 | 0 | 2 | 1 | LINESTRING (-98.50490 40.06789, -98.50525 40.0... |
308 | 74292500301 | -1.000000e+12 | -1.000000e+12 | no_data | 40.063235 | -98.551296 | no_data | -1.000000e+12 | -1.000000e+12 | -1.000000e+12 | ... | 54.0 | 387.837794 | 47 | 3200409.359 | 9496.587434 | -1.000000e+12 | 0 | 2 | 1 | LINESTRING (-98.50490 40.06789, -98.50525 40.0... |
308 | 74292500301 | -1.000000e+12 | -1.000000e+12 | no_data | 40.063235 | -98.551296 | no_data | -1.000000e+12 | -1.000000e+12 | -1.000000e+12 | ... | 54.0 | 387.837794 | 47 | 3200409.359 | 9496.587434 | -1.000000e+12 | 0 | 2 | 1 | LINESTRING (-98.50490 40.06789, -98.50525 40.0... |
262 | 74292500301 | -1.000000e+12 | -1.000000e+12 | no_data | 40.063235 | -98.551296 | no_data | -1.000000e+12 | -1.000000e+12 | -1.000000e+12 | ... | 54.0 | 387.837794 | 47 | 3200409.359 | 9496.587434 | -1.000000e+12 | 0 | 2 | 1 | LINESTRING (-98.50490 40.06789, -98.50525 40.0... |
2 | 74292500301 | -1.000000e+12 | -1.000000e+12 | no_data | 40.063235 | -98.551296 | no_data | -1.000000e+12 | -1.000000e+12 | -1.000000e+12 | ... | 54.0 | 387.837794 | 47 | 3200409.359 | 9496.587434 | -1.000000e+12 | 0 | 2 | 1 | LINESTRING (-98.50490 40.06789, -98.50525 40.0... |
5 rows × 111 columns
= gdf.query('wse > 75')
WSE WSE
reach_id | time | time_tai | time_str | p_lat | p_lon | river_name | wse | wse_u | wse_r_u | ... | p_width | p_wid_var | p_n_nodes | p_dist_out | p_length | p_maf | p_dam_id | p_n_ch_max | p_n_ch_mod | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
263 | 77158000011 | 7.132750e+08 | 7.132750e+08 | 2022-08-08T11:5628Z | 25.297171 | -108.473158 | no_data | 123.71461 | -1.000000e+12 | 0.00000 | ... | 69.5 | 1719.195048 | 49 | 9731.610 | 9731.609922 | -1.000000e+12 | 0 | 2 | 1 | LINESTRING (-108.49317 25.28405, -108.49287 25... |
116 | 71386000311 | 7.132212e+08 | 7.132212e+08 | 2022-08-07T21:0013Z | 48.483550 | -82.856510 | no_data | 111.30161 | -1.000000e+12 | 30.96656 | ... | 45.0 | 285.153070 | 75 | 464346.340 | 15080.667224 | -1.000000e+12 | 0 | 1 | 1 | LINESTRING (-82.87880 48.52825, -82.87919 48.5... |
119 | 73282800021 | 7.134418e+08 | 7.134418e+08 | 2022-08-10T10:1658Z | 33.634414 | -87.209808 | no_data | 88.18387 | -1.000000e+12 | 4.26350 | ... | 211.5 | 3285.033201 | 57 | 687962.665 | 11346.636403 | -1.000000e+12 | 0 | 2 | 1 | LINESTRING (-87.23478 33.62552, -87.23452 33.6... |
630 | 74267700121 | 7.134419e+08 | 7.134419e+08 | 2022-08-10T10:1834Z | 38.778477 | -84.107260 | no_data | 134.81383 | -1.000000e+12 | 2.68570 | ... | 669.0 | 2311.101872 | 57 | 2560861.191 | 11466.933285 | -1.000000e+12 | 0 | 2 | 1 | LINESTRING (-84.17021 38.79320, -84.16986 38.7... |
34 | 73290000041 | 7.145118e+08 | 7.145118e+08 | 2022-08-22T19:3017Z | 30.597928 | -88.626436 | no_data | 118.64166 | -1.000000e+12 | 15.47494 | ... | 105.0 | 754.311517 | 52 | 67960.800 | 10424.745294 | -1.000000e+12 | 0 | 2 | 1 | LINESTRING (-88.60566 30.58840, -88.60597 30.5... |
242 | 74253000021 | 7.145118e+08 | 7.145117e+08 | 2022-08-22T19:2912Z | 34.018836 | -90.967538 | no_data | 91.37639 | -1.000000e+12 | 4.93354 | ... | 968.0 | 67506.844891 | 50 | 1108109.937 | 9988.011659 | -1.000000e+12 | 0 | 4 | 1 | LINESTRING (-91.01678 33.99997, -91.01645 34.0... |
658 | 74291500071 | 7.145117e+08 | 7.145116e+08 | 2022-08-22T19:2746Z | 38.843434 | -92.441821 | no_data | 76.10944 | -1.000000e+12 | 9.76231 | ... | 408.0 | 4018.894985 | 59 | 2280021.224 | 11890.852274 | -1.000000e+12 | 0 | 2 | 1 | LINESTRING (-92.39134 38.81822, -92.39168 38.8... |
7 rows × 111 columns
Converting to CSV
Converting querried variable into a csv file.
/ 'reach.csv') reach.to_csv(folder
/ 'WSE.csv') WSE.to_csv(folder