from pystac_client import Client
from collections import defaultdict
import json
import geopandas
import geoviews as gv
from cartopy import crs
import matplotlib.pyplot as plt
from datetime import datetime
import os
import requests
import boto3
import numpy as np
import xarray as xr
import rasterio as rio
from rasterio.session import AWSSession
from rasterio.plot import show
import rioxarray
import geoviews as gv
import hvplot.xarray
import holoviews as hv
from tqdm import tqdm
from pprint import pprint
import time
gv.extension('bokeh', 'matplotlib')
From the PO.DAAC Cookbook, to access the GitHub version of the notebook, follow this link.
Estimating Reservoir Surface Area From Harmonized Landsat-Sentinel (HLS) Imagery – Local Machine Version
Author: Matthew Bonnema (JPL)
Initiate Data Search
STAC_URL = 'https://cmr.earthdata.nasa.gov/stac'
provider_cat = Client.open(STAC_URL)
catalog = Client.open(f'{STAC_URL}/LPCLOUD/')
#collections = ['HLSL30.v2.0', 'HLSS30.v2.0']
collections = ['HLSL30.v2.0']Define Date Range and Region of Interest
date_range = "2021-01/2022-01"
roi = {
"type": "Feature",
"properties": {},
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
-121.60835266113281,
39.49874248613119
],
[
-121.26983642578124,
39.49874248613119
],
[
-121.26983642578124,
39.756824261131406
],
[
-121.60835266113281,
39.756824261131406
],
[
-121.60835266113281,
39.49874248613119
]
]
]
}
}['geometry']
base = gv.tile_sources.EsriImagery.opts(width=650, height=500)
Reservoir = gv.Polygons(roi['coordinates']).opts(line_color='yellow', line_width=10, color=None)
Reservoir * baseSearch for HLS imagery matching search criteria
search = catalog.search(
collections=collections,
intersects=roi,
datetime=date_range,
limit=100
)
item_collection = search.get_all_items()
search.matched()Filter imagery for low cloud images and identify image bands needed for water classification
s30_bands = ['B8A', 'B03'] # S30 bands for NDWI calculation and quality filtering -> NIR, GREEN, Quality
l30_bands = ['B05', 'B03'] # L30 bands for NDWI calculation and quality filtering -> NIR, GREEN, Quality
cloudcover = 10ndwi_band_links = []
for i in item_collection:
if i.properties['eo:cloud_cover'] <= cloudcover:
if i.collection_id == 'HLSS30.v2.0':
#print(i.properties['eo:cloud_cover'])
ndwi_bands = s30_bands
elif i.collection_id == 'HLSL30.v2.0':
#print(i.properties['eo:cloud_cover'])
ndwi_bands = l30_bands
for a in i.assets:
if any(b==a for b in ndwi_bands):
ndwi_band_links.append(i.assets[a].href)ndwi_band_links[:10]tile_dicts = defaultdict(list) for l in ndwi_band_links:
tile = l.split('.')[-6]
tile_dicts[tile].append(l)tile_dicts.keys()tile_links = tile_dicts['T10SFJ']bands_dicts = defaultdict(list)
for b in tile_links:
band = b.split('.')[-2]
bands_dicts[band].append(b)
for i in bands_dicts:
print(i)Download identified images to local computer
os.makedirs("downloads", exist_ok=True)def download(url: str, fname: str):
resp = requests.get(url, stream=True)
total = int(resp.headers.get('content-length', 0))
with open(fname, 'wb') as file, tqdm(
desc=fname,
ncols=110,
total=total,
unit='iB',
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in resp.iter_content(chunk_size=1024):
size = file.write(data)
bar.update(size)path_dicts = defaultdict(list)
print('Begin Downloading Imagery')
start_time = time.time()
for key in bands_dicts:
url = bands_dicts[key]
for u in url:
filename = u.split('/')[-1]
path = './downloads/' + filename
download(u,path)
path_dicts[key].append(path)
print('Download Complete')
print("--- %s seconds ---" % (time.time() - start_time))Load images and visualize
def time_index_from_filenames(file_links):
return [datetime.strptime(f.split('.')[-5], '%Y%jT%H%M%S') for f in file_links]time = xr.Variable('time', time_index_from_filenames(path_dicts['B03']))
chunks=dict(band=1, x=512, y=512)
hls_ts_da_LB3 = xr.concat([rioxarray.open_rasterio(f, chunks=chunks).squeeze('band', drop=True) for f in path_dicts['B03']], dim=time)
hls_ts_da_LB5 = xr.concat([rioxarray.open_rasterio(f, chunks=chunks).squeeze('band', drop=True) for f in path_dicts['B05']], dim=time)
hls_ts_da_LB3 = hls_ts_da_LB3.rio.reproject("epsg:4326")
hls_ts_da_LB5 = hls_ts_da_LB5.rio.reproject("epsg:4326")hls_ts_da_data_LB3 = hls_ts_da_LB3.load()
hls_ts_da_data_LB5 = hls_ts_da_LB5.load()
hls_ts_da_data_LB3 = hls_ts_da_data_LB3.rio.clip([roi])
hls_ts_da_data_LB5 = hls_ts_da_data_LB5.rio.clip([roi])hls_ts_da_data_LB5.hvplot.image(x='x', y='y', rasterize=True, width=600, height=400, colorbar=True, cmap='gray').opts(clim=(0,2000))Caclulate Normalized Difference Water Index (NDWI) and Classify Innundated Areas
LB3 = hls_ts_da_data_LB3
LB5 = hls_ts_da_data_LB5
NDWI = (LB3-LB5)/(LB3+LB5)
NDWI.hvplot.image(x='x', y='y', rasterize=True, width=600, height=400, colorbar=True, cmap='coolwarm').opts(clim=(-0.5,0.5))water = NDWI>0
water.hvplot.image(x='x', y='y', rasterize=True, width=600, height=400, colorbar=True, cmap='PuOr').opts(clim=(0,1))Caclulate surface area of reservoir and plot time series
if water.variable.max() == True:
water_real = water*30*30
water_area = water_real.sum(axis=(1,2))
%matplotlib inline
fig, ax = plt.subplots()
(water_area[:]/1000000).plot(ax=ax, linewidth=2, linestyle = '-', marker='o')
ax.set_title("Surface area of waterbody in km2")
ax.set_ylabel('Area [km^2]')