Source code for dea_tools.validation

Tools for validating outputs and producing accuracy assessment metrics.

License: The code in this notebook is licensed under the Apache License,
Version 2.0 ( Digital Earth
Australia data is licensed under the Creative Commons by Attribution 4.0
license (

Contact: If you need assistance, please post a question on the Open Data
Cube Slack channel ( or on the GIS Stack
Exchange (
using the `open-data-cube` tag (you can view previously asked questions

If you would like to report an issue with this script, you can file one
on GitHub (

Last modified: April 2023

import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt
from scipy import stats

[docs] def eval_metrics(x, y, round=3, all_regress=False): """ Calculate a set of common statistical metrics based on two input actual and predicted vectors. These include: - Pearson correlation - Root Mean Squared Error - Mean Absolute Error - R-squared - Bias - Linear regression parameters (slope, p-value, intercept, standard error) Parameters ---------- x : numpy.array An array providing "actual" variable values y : numpy.array An array providing "predicted" variable values round : int Number of decimal places to round each metric to. Defaults to 3 all_regress : bool Whether to return linear regression p-value, intercept and standard error (in addition to only regression slope). Defaults to False Returns ------- A pandas.Series containing calculated metrics """ # Create dataframe to drop na xy_df = pd.DataFrame({"x": x, "y": y}).dropna() # Compute linear regression lin_reg = stats.linregress(x=xy_df.x, y=xy_df.y) # Calculate statistics stats_dict = { "Correlation": xy_df.corr().iloc[0, 1], "RMSE": sqrt(mean_squared_error(xy_df.x, xy_df.y)), "MAE": mean_absolute_error(xy_df.x, xy_df.y), "R-squared": lin_reg.rvalue**2, "Bias": (xy_df.y - xy_df.x).mean(), "Regression slope": lin_reg.slope, } # Additional regression params if all_regress: stats_dict.update( { "Regression p-value": lin_reg.pvalue, "Regression intercept": lin_reg.intercept, "Regression standard error": lin_reg.stderr, } ) # Return as return pd.Series(stats_dict).round(round)