From the PO.DAAC Cookbook, to access the GitHub version of the notebook, follow this link.

Estimating Reservoir Surface Area From Harmonized Landsat-Sentinel (HLS) Imagery – Local Machine Version

Author: Matthew Bonnema (JPL)

from pystac_client import Client  
from collections import defaultdict    
import json
import geopandas
import geoviews as gv
from cartopy import crs
import matplotlib.pyplot as plt
from datetime import datetime
import os
import requests
import boto3
import numpy as np
import xarray as xr
import rasterio as rio
from rasterio.session import AWSSession
from rasterio.plot import show
import rioxarray
import geoviews as gv
import hvplot.xarray
import holoviews as hv
from tqdm import tqdm
from pprint import pprint
import time
gv.extension('bokeh', 'matplotlib')

Define Date Range and Region of Interest

date_range = "2021-01/2022-01"
roi = {
      "type": "Feature",
      "properties": {},
      "geometry": {
        "type": "Polygon",
        "coordinates": [
          [
            [
              -121.60835266113281,
              39.49874248613119
            ],
            [
              -121.26983642578124,
              39.49874248613119
            ],
            [
              -121.26983642578124,
              39.756824261131406
            ],
            [
              -121.60835266113281,
              39.756824261131406
            ],
            [
              -121.60835266113281,
              39.49874248613119
            ]
          ]
        ]
      }
    }['geometry']
base = gv.tile_sources.EsriImagery.opts(width=650, height=500)
Reservoir = gv.Polygons(roi['coordinates']).opts(line_color='yellow', line_width=10, color=None)
Reservoir * base

Search for HLS imagery matching search criteria

search = catalog.search(
    collections=collections,
    intersects=roi,
    datetime=date_range,
    limit=100
)

item_collection = search.get_all_items()
search.matched()

Filter imagery for low cloud images and identify image bands needed for water classification

s30_bands = ['B8A', 'B03']    # S30 bands for NDWI calculation and quality filtering -> NIR, GREEN, Quality 
l30_bands = ['B05', 'B03']    # L30 bands for NDWI calculation and quality filtering -> NIR, GREEN, Quality 
cloudcover = 10
ndwi_band_links = []

for i in item_collection:
    if i.properties['eo:cloud_cover'] <= cloudcover:
        if i.collection_id == 'HLSS30.v2.0':
            #print(i.properties['eo:cloud_cover'])
            ndwi_bands = s30_bands
        elif i.collection_id == 'HLSL30.v2.0':
            #print(i.properties['eo:cloud_cover'])
            ndwi_bands = l30_bands

        for a in i.assets:
            if any(b==a for b in ndwi_bands):
                ndwi_band_links.append(i.assets[a].href)
ndwi_band_links[:10]
tile_dicts = defaultdict(list)  
for l in ndwi_band_links:
    tile = l.split('.')[-6]
    tile_dicts[tile].append(l)
tile_dicts.keys()
tile_links = tile_dicts['T10SFJ']
bands_dicts = defaultdict(list)
for b in tile_links:
    band = b.split('.')[-2]
    bands_dicts[band].append(b)
for i in bands_dicts:
    print(i)

Download identified images to local computer

os.makedirs("downloads", exist_ok=True)
def download(url: str, fname: str):
    resp = requests.get(url, stream=True)
    total = int(resp.headers.get('content-length', 0))
    with open(fname, 'wb') as file, tqdm(
        desc=fname,
        ncols=110,
        total=total,
        unit='iB',
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for data in resp.iter_content(chunk_size=1024):
            size = file.write(data)
            bar.update(size)
path_dicts = defaultdict(list)
print('Begin Downloading Imagery')
start_time = time.time()
for key in bands_dicts:
    url = bands_dicts[key]
    for u in url:
        filename = u.split('/')[-1]
        path = './downloads/' + filename
        download(u,path)
        path_dicts[key].append(path)
print('Download Complete')
print("--- %s seconds ---" % (time.time() - start_time))

Load images and visualize

def time_index_from_filenames(file_links):
    return [datetime.strptime(f.split('.')[-5], '%Y%jT%H%M%S') for f in file_links]
time = xr.Variable('time', time_index_from_filenames(path_dicts['B03']))
chunks=dict(band=1, x=512, y=512)
hls_ts_da_LB3 = xr.concat([rioxarray.open_rasterio(f, chunks=chunks).squeeze('band', drop=True) for f in path_dicts['B03']], dim=time)
hls_ts_da_LB5 = xr.concat([rioxarray.open_rasterio(f, chunks=chunks).squeeze('band', drop=True) for f in path_dicts['B05']], dim=time)
hls_ts_da_LB3 = hls_ts_da_LB3.rio.reproject("epsg:4326")
hls_ts_da_LB5 = hls_ts_da_LB5.rio.reproject("epsg:4326")
hls_ts_da_data_LB3 = hls_ts_da_LB3.load()
hls_ts_da_data_LB5 = hls_ts_da_LB5.load()
hls_ts_da_data_LB3 = hls_ts_da_data_LB3.rio.clip([roi])
hls_ts_da_data_LB5 = hls_ts_da_data_LB5.rio.clip([roi])
hls_ts_da_data_LB5.hvplot.image(x='x', y='y', rasterize=True, width=600, height=400, colorbar=True, cmap='gray').opts(clim=(0,2000))

Caclulate Normalized Difference Water Index (NDWI) and Classify Innundated Areas

LB3 = hls_ts_da_data_LB3 
LB5 = hls_ts_da_data_LB5
NDWI = (LB3-LB5)/(LB3+LB5)
NDWI.hvplot.image(x='x', y='y', rasterize=True, width=600, height=400, colorbar=True, cmap='coolwarm').opts(clim=(-0.5,0.5))
water = NDWI>0
water.hvplot.image(x='x', y='y', rasterize=True, width=600, height=400, colorbar=True, cmap='PuOr').opts(clim=(0,1))

Caclulate surface area of reservoir and plot time series

if water.variable.max() == True:
    water_real = water*30*30
water_area = water_real.sum(axis=(1,2))

%matplotlib inline

fig, ax = plt.subplots()
(water_area[:]/1000000).plot(ax=ax, linewidth=2, linestyle = '-',  marker='o')
ax.set_title("Surface area of waterbody in km2")
ax.set_ylabel('Area [km^2]')