import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from math import sqrt
from scipy import stats

[docs] def eval_metrics(x, y, round=3, all_regress=False): """ Calculate a set of common statistical metrics based on two input actual and predicted vectors. These include: - Pearson correlation - Root Mean Squared Error - Mean Absolute Error - R-squared - Bias - Linear regression parameters (slope, p-value, intercept, standard error) Parameters ---------- x : numpy.array An array providing "actual" variable values y : numpy.array An array providing "predicted" variable values round : int Number of decimal places to round each metric to. Defaults to 3 all_regress : bool Whether to return linear regression p-value, intercept and standard error (in addition to only regression slope). Defaults to False Returns ------- A pandas.Series containing calculated metrics """ # Create dataframe to drop na xy_df = pd.DataFrame({"x": x, "y": y}).dropna() # Compute linear regression lin_reg = stats.linregress(x=xy_df.x, y=xy_df.y) # Calculate statistics stats_dict = { "Correlation": xy_df.corr().iloc[0, 1], "RMSE": sqrt(mean_squared_error(xy_df.x, xy_df.y)), "MAE": mean_absolute_error(xy_df.x, xy_df.y), "R-squared": r2_score(xy_df.x, xy_df.y), "Bias": (xy_df.y - xy_df.x).mean(), "Regression slope": lin_reg.slope, } # Additional regression params if all_regress: stats_dict.update( { "Regression p-value": lin_reg.pvalue, "Regression intercept": lin_reg.intercept, "Regression standard error": lin_reg.stderr, } ) # Return as return pd.Series(stats_dict).round(round)