Using the combined_qa asset

Using the `combined_qa` asset#

This notebook demonstrates the process of:

Searching a collection to intersect a point location between a start date and end date
Examining the combined_qa asset alongside the lst asset
Describing the values present in the combined_qa asset
Masking the lst asset with values from the combined_qa asset

Let’s import the required packages

import json
import requests
import pystac
from pystac_client import Client
from pprint import pprint

from shapely.geometry import box, mapping, Point, Polygon
from pyproj.crs import CRS
import geopandas as gpd

import xarray as xr
import rioxarray as rxr
import rasterio as rio

from matplotlib import pyplot as plt
import numpy as np

import os
import base64

from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib as mpl

# supress warnings from numpy
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

We’ll define a helper function to plot the lst and combined_qa assets side-by-side, as well as masking or keeping particular QA values. This also exists in FH_Hydrosat but is included here for descriptive purposes.

def plot_lst_and_qa(hdst_item, mask_val=None, keep_val=None):
    
    mask_href = hdst_item.to_dict()['assets']['combined_qa']['href']
    with rio.open(mask_href) as src:
        qa = src.read(1)

    lst_href = hdst_item.to_dict()['assets']['lst']['href']
    with rio.open(lst_href) as src:
        lst = src.read(1)
     
    ## apply masking or keeping of values
    # if a mask_val was provided, use it to set those pixels to np.nan so they won't be used 
    # for analysis or visualizaton. This can be done by numpy's `where` function, which operates as
    # np.where(condition, value_if_true, value_if_false)
    # if there are multiple values in a list to mask, we will mask each one sequentially.
    if mask_val is not None:
        
        # if a list of mask values was provided, sequentially set the lst data where the QA data 
        # matches those values to np.nan
        if type(mask_val) == list:
            for mv in mask_val:
                lst = np.where(qa == mask_val, np.nan, lst)
        elif type(mask_val) == float:
            
            # if only one value supplied to mask, us np.where() to set the lst data
            # to np.nan where that condition is true
            lst = np.where(qa == mask_val, np.nan, lst)
        else:
            raise ValueError("mask_val must be either float or list of floats")
    
    # if a keep_val was provided, use it to set pixels not equal to those values to np.nan so 
    # they won't be used for analysis or visualizaton. This can be done with boolean logic and indexing
    # directly into the array. if there are multiple values, we will sequentially OR the mask as more values
    # are used.
    if keep_val is not None:
        if type(keep_val) == list:
            
            # if it's a list, there should be multiple values. iterate through them.
            for i,kv in enumerate(keep_val):
                # build keep mask
                if i == 0:
                    keep_mask = qa == kv
                else:
                    keep_mask = keep_mask | (qa == kv)
            
            # take the complement of the keep_mask to retain only lst data where the QA values match
            # the provided list.
            lst[~keep_mask] = np.nan
                
        elif type(keep_val) == float:
            
            # if only one value provided to keep, set the lst data not equal to that value to np.nan
            lst[qa != keep_val] = np.nan
        else:
            raise ValueError("keep_val must be either float or list of floats")
        

    ## display the data side-by-side
    fig, ax = plt.subplots(1,2, figsize=(10,5))
    im = ax[0].imshow(lst, cmap='inferno')
    ax[0].set_title('HDST')
    divider = make_axes_locatable(ax[0])
    cax = divider.append_axes('right', size='5%', pad=0.05)
    fig.colorbar(im, cax=cax, orientation='vertical')

    #### make a unique value colormap for the QA data
    cmap = plt.cm.cividis  # define the colormap
    # extract all colors from the cmap
    cmaplist = [cmap(i) for i in range(cmap.N)]

    # create the new map
    cmap = mpl.colors.LinearSegmentedColormap.from_list(
        'Custom cmap', cmaplist, cmap.N)

    # define the bins and normalize
    num_vals = int(np.unique(qa).shape[0])
    bounds = np.linspace(0, num_vals, num_vals)
    norm = mpl.colors.BoundaryNorm(bounds, cmap.N)


    im = ax[1].imshow(qa, cmap=cmap, norm=norm)
    ax[1].set_title('COMBINED_QA')
    divider = make_axes_locatable(ax[1])
    cax = divider.append_axes('right', size='5%', pad=0.05)
    cbar = mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=norm, spacing='proportional', 
                              ticks=bounds+0.5, boundaries=bounds, format='%1i')

    cbar.ax.set_yticklabels(np.unique(qa))

    [a.axis('off') for a in ax]

    plt.show()
    
    return

If you see ERROR 1: PROJ: proj_create_from_database: Open of /opt/conda/share/proj failed, do not fret, and proceed.

This next cell opens a file creds.json which you will need to create in the same directory as the notebook. The format of the file should be:

{
"username":"your_username",
"password":"your_password"
}

and you have updated with your username and password.

with open('creds.json') as f:
    creds = json.loads(f.read())
    

This next cell will endecode the username:password combination and use it to authorize access to the STAC API given by the cat_url endpoint.

userpass = f"{creds['username']}:{creds['password']}"
b64 = base64.b64encode(userpass.encode()).decode()
headers = {'Authorization':'Basic ' + b64}

cat_url = 'https://stac.hydrosat.com'
catalog = Client.open(cat_url, headers)

We’ll search for data in the starfm_predictions_modis_landsat and pydms_sharpened_landsat collections which intersect a point location between a start date and an end date and print out the number of items.

geom = {'type': 'Point', 'coordinates': [-120.21102905273436,36.535019209789]} # Point for Hanford, CA, USA

collections = ["starfm_predictions_modis_landsat", "pydms_sharpened_landsat"]
start_date = "2021-08-17T00:00:00Z"
end_date = "2021-10-30T00:00:00Z"

search = catalog.search(
    collections = collections,
    intersects = geom,
    datetime = [start_date, end_date],
    max_items = 500
)

# items = list(search.items()) # for pystac-client >= 0.4.0
items = list(search.get_all_items()) # for pystac-client < 0.4.0
items.reverse() # make the results ascending in time
len(items)

This next cell will determine if the data returned covers more than a single MGRS tile. If there is, choose one of the tiles to subset the list of returned items.

mgrs_tiles = []
for i in items:
    for l in i.to_dict()['links']:
        if 'element84' in l['href']:
            mgrs_tiles.append(l['href'].split(r'/')[-1].split('_')[1])
print(f'number of tiles in query: {len(set(mgrs_tiles))}, {set(mgrs_tiles)}')

# if there is more than one tile, uncomment and execute this next line to choose the MGRS tile you are interested in
# items = [i for i in items if mgrs_tiles[0] in i.id]

number of tiles in query: 1, {'10SGF'}

Applying a mask to a single asset#

We’ll choose an arbitrary dataset from the results to demonstrate the contents of the combined_qa asset in relation to the HDST asset. We provide a function to unpack the values from the combined_qa asset, but the user is referred to our documentation for the full list of possible values.

item_num = 1
plot_lst_and_qa(items[item_num])

../_images/7a29f9fc8ec9772e0a0de129465a8955fe9b643c42d6bfefe2c3e16eeab1c72e.png

from FH_Hydrosat import unpack_qa_value

# read in the combined_qa asset
qa_href = items[item_num].to_dict()['assets']['combined_qa']['href']
with rio.open(qa_href) as src:
    qa = src.read(1)
    
# unpack the value and extract the qa label
for val in np.unique(qa):
    print(f"{val}: {unpack_qa_value(val)}")

0: ['valid pixel in all inputs']
0: ['Prepared High-Resolution t0']
0: ['Prepared Low-Resolution t0']
0: ['Fused t1']
0: ['Prepared High-Resolution t0', 'Fused t1']
0: ['Prepared Low-Resolution t0', 'Fused t1']
0: ['Prepared High-Resolution t0', 'Sharpened High-Resolution t0', 'Fused t1']
0: ['Sharpened Low-Resolution t0', 'Fused t1']
0: ['Prepared Low-Resolution t0', 'Sharpened Low-Resolution t0', 'Fused t1']
0: ['Sharpened Low-Resolution t1', 'Fused t1']
0: ['Prepared Low-Resolution t0', 'Sharpened Low-Resolution t1', 'Fused t1']
0: ['Sharpened Low-Resolution t0', 'Sharpened Low-Resolution t1', 'Fused t1']
0: ['Prepared Low-Resolution t0', 'Sharpened Low-Resolution t0', 'Sharpened Low-Resolution t1', 'Fused t1']

This says we have mostly valid data across the inputs (0.0), but in some areas we have missing data in the high-resolution LST input at time t0 (1.0) and the low-resolution LST input at t0 (4.0). Generally, the more input datasets which contribute to the HDST data that have NoData, the higher the value in the combined_qa asset.

We can filter out pixels in the HDST data which do not come from clear observations in all inputs by keeping only LST pixels which match where combined_qa == 0.0.

plot_lst_and_qa(items[item_num], keep_val=0.0)

../_images/219156b28eff8c159b5e4c130fab674a333f09b36c97850323236f64df495b7b.png

This shows us that we have a swath of data missing on the western side of tile 10SGF for this particular item, which is often due to the orbit of Landsat and its data as clipped within the MGRS tile. If we want to keep data that may have been missing in the MODIS input LST (1.0) and Landsat input LST (4.0), we can do that as follows:

plot_lst_and_qa(items[item_num], keep_val=[0.0, 1.0, 4.0])

../_images/3b7afc76edf1392d3ddecefc2bc0af935e53747829085bc20579831be0367d47.png

Alternatively, we can mask pixels in the LST where the combined_qa asset tells us we had poor input data everywhere else besides the prepped high resolution and low-resolution input. We can extract the values from the combined_qa asset and pass that list as the mask_val argument to our helper function. This will result the same data as above.

qa_vals = np.unique(qa) # from above
qa_vals_to_mask = list(qa_vals[qa_vals > 4.0])
print(qa_vals_to_mask)

[256.0, 257.0, 260.0, 273.0, 320.0, 324.0, 384.0, 388.0, 448.0, 452.0]

plot_lst_and_qa(items[item_num], mask_val=qa_vals_to_mask)

More Advanced Application: Applying the mask to a time series of data#

Now we’ll pass the first 25 items to the FH_Hydrosat class and stack the items into an xarray DataArray for both the lst and combined_qa assets.

from FH_Hydrosat import FH_Hydrosat

hdst_res = FH_Hydrosat(items[:25]) # lst is the default asset
hdst_stacked_res = hdst_res.stack()
hdst_ds = hdst_stacked_res.ds.sortby('time')
hdst_ds

<xarray.DataArray (time: 25, band: 1, y: 5490, x: 5490)>
dask.array<concatenate, shape=(25, 1, 5490, 5490), dtype=float32, chunksize=(1, 1, 2048, 2048), chunktype=numpy.ndarray>
Coordinates:
  * band         (band) int32 1
  * x            (x) float64 7e+05 7e+05 7e+05 ... 8.097e+05 8.097e+05 8.098e+05
  * y            (y) float64 4.1e+06 4.1e+06 4.1e+06 ... 3.99e+06 3.99e+06
    spatial_ref  int32 0
  * time         (time) object 2021-08-17T11:59:59.500000+00:00 ... 2021-09-1...
Attributes:
    _FillValue:    nan
    scale_factor:  1.0
    add_offset:    0.0

qa_res = FH_Hydrosat(items[:25], asset='combined_qa')
qa_stacked_res = qa_res.stack()
qa_ds = qa_stacked_res.ds.sortby('time')
qa_ds

<xarray.DataArray (time: 25, band: 1, y: 5490, x: 5490)>
dask.array<concatenate, shape=(25, 1, 5490, 5490), dtype=float32, chunksize=(1, 1, 2048, 2048), chunktype=numpy.ndarray>
Coordinates:
  * band         (band) int32 1
  * x            (x) float64 7e+05 7e+05 7e+05 ... 8.097e+05 8.097e+05 8.098e+05
  * y            (y) float64 4.1e+06 4.1e+06 4.1e+06 ... 3.99e+06 3.99e+06
    spatial_ref  int32 0
  * time         (time) object 2021-08-17T11:59:59.500000+00:00 ... 2021-09-1...
Attributes:
    _FillValue:    nan
    scale_factor:  1.0
    add_offset:    0.0

The DataArray is quite large if we try to access all of the data. For ease of computation, we’ll subset the DataArray by a polygon, which will be generated by creating a rectangular buffer around the point location by 1km on either side.

p_geom = Point(geom['coordinates'][0], geom['coordinates'][1])
point_df = gpd.GeoDataFrame({'geometry':[p_geom]}, crs=CRS.from_epsg(4326))

raster_crs = CRS.from_wkt(hdst_ds.spatial_ref.crs_wkt)
buffer_dist = 1000 # 1km in local UTM zone
poly_df = point_df.to_crs(raster_crs).buffer(buffer_dist, cap_style = 3) # square buffer

Let’s plot the polygon on a Folium map so we can see where we are extracting the data

import folium 

# Use WGS 84 (epsg:4326) as the geographic coordinate system
df = gpd.GeoDataFrame(poly_df.to_crs(epsg=4326))

m = folium.Map(location=[p_geom.y, p_geom.x], zoom_start=13, tiles='CartoDB positron')

# add the polygon and centroid
for _, r in df.iterrows():
    # Without simplifying the representation of each polygon,
    # the map might not be displayed
    #sim_geo = gpd.GeoSeries(r['geometry']).simplify(tolerance=0.001)
    sim_geo = r[0].simplify(tolerance=0.001)
    geo_j = gpd.GeoSeries(sim_geo).to_json()
    geo_j = folium.GeoJson(data=geo_j,
                           style_function=lambda x: {'fillColor': 'orange'})
    
    geo_j.add_to(m)
    
    lat = sim_geo.centroid.y
    lon = sim_geo.centroid.x
    folium.Marker(location=[lat, lon]).add_to(m)
    
m

Make this Notebook Trusted to load map: File -> Trust Notebook

Now let’s clip both of the HDST and QA datasets with the geometry using rioxarray’s rio utility package

from FH_Hydrosat import FH_StackedDataset

# clip the raster dataset and cast to a class with slightly more functions
hdst_clipped = FH_StackedDataset(hdst_ds.rio.clip(poly_df.geometry))
hdst_ds_clip = hdst_clipped.ds

qa_clipped = FH_StackedDataset(qa_ds.rio.clip(poly_df.geometry))
qa_ds_clip = qa_clipped.ds

Now that we have a smaller DataArray, let’s plot the HDST and QA contents according to the time dimension using xarray’s plot utility.

hdst_ds_clip.plot(x='x', y='y', col='time', col_wrap=5, cmap='inferno');

../_images/c0a6ec6399c66aa83e7c4115c3b234b7dcefba5aa84bd80f04e8a91527ae2905.png

qa_ds_clip.plot(x='x', y='y', col='time', col_wrap=5, cmap='cividis');

../_images/8d871171fdcf9780e4c957e9fcd68adb8faf8c66f71c9be234bfe404230b427f.png

For this small area, the combined_qa asset is telling us we have good data in all but one date, 2021-09-09. Let’s check the qa values in each item of this clipped data.

for date, sub_qa in qa_ds_clip.groupby('time'):
    print(date)
    print(np.unique(sub_qa.values))

2021-08-17 11:59:59.500000+00:00
[0.]
2021-08-18 11:59:59.500000+00:00
[0.]
2021-08-19 11:59:59.500000+00:00
[0.]
2021-08-20 11:59:59.500000+00:00
[0.]
2021-08-21 18:40:16.415650+00:00
[0.]
2021-08-22 11:59:59.500000+00:00
[0.]
2021-08-23 11:59:59.500000+00:00
[0.]
2021-08-24 11:59:59.500000+00:00
[0.]
2021-08-25 11:59:59.500000+00:00
[0.]
2021-08-26 11:59:59.500000+00:00
[0.]
2021-08-27 11:59:59.500000+00:00
[0.]
2021-08-28 11:59:59.500000+00:00
[0.]
2021-08-29 11:59:59.500000+00:00
[0.]
2021-08-30 18:33:56.253740+00:00
[0.]
2021-08-31 11:59:59.500000+00:00
[0.]
2021-09-01 11:59:59.500000+00:00
[0.]
2021-09-02 11:59:59.500000+00:00
[0.]
2021-09-03 11:59:59.500000+00:00
[0.]
2021-09-04 11:59:59.500000+00:00
[0.]
2021-09-05 11:59:59.500000+00:00
[0.]
2021-09-06 18:40:21.164950+00:00
[0.]
2021-09-07 11:59:59.500000+00:00
[0.]
2021-09-08 11:59:59.500000+00:00
[0.]
2021-09-09 11:59:59.500000+00:00
[0.]
2021-09-10 11:59:59.500000+00:00
[0.]

In this dataset, we have all clear observations from the inputs over this point location except for on 2021-09-09, where we have a poor input from the coarse resolution data. Thankfully, the Fusion Hub processing was able to provide data where it would otherwise be missing from the source data.

unpack_qa_value(8.)

['Prepared Low-Resolution t1']

Plot a time series for the point location#

With the same DataArray, we can extract the pixel values which intersect a point location. Let’s use the same point location we used to search the STAC catalog and plot it.

centroid = poly_df.geometry[0].centroid
set_x, set_y, pixtype = (centroid.x, centroid.y, 'Center Pixel') # change 'Center Pixel' for plot title

ax = hdst_ds_clip.plot(x='x', y='y', col='time', col_wrap=5, cmap='inferno')
ax.set_xlabels('Easting [m]')
ax.set_ylabels('Northing [m]')
ax.map(lambda: plt.plot(set_x, set_y, markersize=20, marker=".", color="c"))
plt.show()

../_images/197c7d6076258881b6936194cc028ba04f47d69e0661088bfdcd6f220e1da103.png

We can construct a boolean mask to exclude any data which does not have all clear observations, and compare the time series for the point location to data which is not masked.

(If you want to include more QA values in the masking, re-use the code in the plotting helper function we defined at the top of this notebook in combination with this example.)

qa_ds_clip_masked = xr.where(qa_ds_clip > 0, 0., 1.)
hdst_ds_clip_masked = xr.where(qa_ds_clip_masked == 0., np.nan, hdst_ds_clip)

ax = hdst_ds_clip_masked.plot(x='x', y='y', col='time', col_wrap=5, cmap='inferno')
ax.set_xlabels('Easting [m]')
ax.set_ylabels('Northing [m]')
ax.map(lambda: plt.plot(set_x, set_y, markersize=20, marker=".", color="c"))
plt.show()

../_images/4f2719d5313f75ac2259371ad83758339b43306037a9375ea285b6b4191865a0.png

Now we can plot the time series for both datasets, with and without QA masking. Most of the data points will be exactly the same, so for display purposes we will artifically add 1K to the QA masked time series.

fig, ax = plt.subplots(figsize=(12,7))
hdst_ds_clip.isel(band=0).sel(x=set_x, y=set_y, method='nearest', tolerance=20).plot(ax=ax, marker='o', c='m', label='not QA masked')

# artifically offset for display
(hdst_ds_clip_masked+1).sel(x=set_x, y=set_y, method='nearest', tolerance=20).plot(ax=ax, marker='o', c='c', label='QA masked')
plt.title(f'time series for {pixtype} pixel')
plt.grid(True)
plt.legend()
plt.ylabel('Fused LST [K]')
plt.xticks(rotation=45)
plt.show()

../_images/a82627885c793dc4ddea3ebf0b44df6fd2f3a12df87a1f9898fa6bf9d87f909c.png

Using the combined_qa asset

Contents

Using the combined_qa asset#

Applying a mask to a single asset#

More Advanced Application: Applying the mask to a time series of data#

Plot a time series for the point location#

Using the `combined_qa` asset#