Source code for PyBMF.utils.metrics

from .boolean_utils import multiply, matmul, dot, power, ismat
from .sparse_utils import to_dense, to_triplet, to_sparse
from scipy.sparse import spmatrix, issparse, csr_matrix
import numpy as np
from sklearn.metrics import recall_score, precision_score, accuracy_score, f1_score


[docs] def get_metrics(gt, pd, metrics, axis=None): '''Get results of the metrics all at once. Metrics from sklearn.metrics are included as sanity check. Their input must be binary `array`, which makes them slow and less flexible. Parameters ---------- gt : array, spmatrix Ground truth, can be 1d array, 2d dense or sparse matrix. pd : array, spmatrix Prediction, can be 1d array, 2d dense or sparse matrix. When the input are matrices, row and column-wise measurement can be conducted by defining `axis`. metrics : list of str The name of metrics. axis : int in {0, 1} When `axis` == 0, The `result` containing the column-wise measurement has the same length as columns. Returns ------- results : list ''' if np.isnan(to_dense(pd, squeeze=True)).any(): raise TypeError("NaN is found in prediction.") functions = { 'TP': TP, 'FP': FP, 'TN': TN, 'FN': FN, 'TPR': TPR, 'FPR': FPR, 'TNR': TNR, 'FNR': FNR, 'PPV': PPV, 'ACC': ACC, 'ERR': ERR, 'F1': F1, 'Recall': TPR, 'Precision': PPV, 'Accuracy': ACC, 'Error': ERR, # alias 'RMSE': RMSE, 'MAE': MAE, # real distances } sklearn_metrics = { 'recall_score': recall_score, 'precision_score': precision_score, 'accuracy_score': accuracy_score, 'f1_score': f1_score, } results = [] for m in metrics: if m in functions: results.append(functions[m](gt, pd, axis)) elif m in sklearn_metrics: # must be binary arrays gt = to_dense(gt).flatten() pd = to_dense(pd).flatten() results.append(sklearn_metrics[m](gt, pd)) else: results.append(None) return results
[docs] def TP(gt, pd, axis=None): s = multiply(gt, pd, boolean=True).sum(axis=axis) return np.array(s).squeeze()
[docs] def FP(gt, pd, axis=None): diff = pd - gt if issparse(diff): s = diff.maximum(0).sum(axis=axis) return np.array(s).squeeze() else: s = np.maximum(diff, 0).sum(axis=axis) return s
[docs] def TN(gt, pd, axis=None): return TP(gt=invert(gt), pd=invert(pd), axis=axis)
[docs] def FN(gt, pd, axis=None): return FP(gt=pd, pd=gt, axis=axis)
[docs] def TPR(gt, pd, axis=None): '''sensitivity, recall, hit rate, or true positive rate ''' denom = gt.sum(axis=axis) return TP(gt, pd, axis=axis) / denom if denom > 0 else 0
[docs] def TNR(gt, pd, axis=None): '''specificity, selectivity or true negative rate ''' denom = invert(gt).sum(axis=axis) return TN(gt, pd, axis=axis) / denom if denom > 0 else 0
[docs] def FPR(gt, pd, axis=None): '''fall-out or false positive rate ''' return 1 - TNR(gt, pd, axis=axis)
[docs] def FNR(gt, pd, axis=None): '''miss rate or false negative rate ''' return 1 - TPR(gt, pd, axis=axis)
[docs] def PPV(gt, pd, axis=None): '''precision or positive predictive value ''' denom = pd.sum(axis=axis) return TP(gt, pd, axis=axis) / denom if denom > 0 else 0
[docs] def ACC(gt, pd, axis=None): '''Accuracy. ''' if len(pd.shape) == 2: n = pd.shape[0] * pd.shape[1] if axis is None else pd.shape[axis] else: n = len(pd) return (TP(gt, pd, axis) + TN(gt, pd, axis)) / n
[docs] def ERR(gt, pd, axis=None): '''Error rate. ''' return 1 - ACC(gt, pd, axis)
[docs] def F1(gt, pd, axis=None): '''F1 score. tp = TP(gt, pd, axis) fp = FP(gt, pd, axis) fn = FN(gt, pd, axis) return 2 * tp / (2 * tp + fp + fn) ''' precision = PPV(gt, pd, axis) recall = TPR(gt, pd, axis) denom = precision + recall return 2 * precision * recall / denom if denom > 0 else 0
def _get_size(X, axis=None): if axis is not None: return X.shape[axis] else: return X.shape[0] * X.shape[1] if len(X.shape) == 2 else len(X)
[docs] def RMSE(gt, pd, axis=None): N = _get_size(gt, axis=axis) rmse = np.sqrt(power(gt - pd, 2).sum(axis) / N) return rmse
[docs] def MAE(gt, pd, axis=None): N = _get_size(gt, axis=axis) mae = np.abs(gt - pd).sum(axis) / N return mae
[docs] def invert(X): if issparse(X): X = csr_matrix(np.ones(X.shape)) - X elif isinstance(X, np.ndarray): X = 1 - X else: raise TypeError return X
[docs] def description_length(gt, U, V, pd=None, w_model=1.0, w_fp=1.0, w_fn=1.0): '''The vanilla description length function. Will compute X_pd from U and V if pd is None. ''' pd = matmul(U, V.T, sparse=True, boolean=True) if pd is None else pd return w_model * (U.sum() + V.sum()) + w_fp * FP(gt, pd) + w_fn * FN(gt, pd)
[docs] def weighted_error(gt, pd, w_fp=0.5, w_fn=None, axis=None): '''Coverage cost function to be minimized. ''' w_fn = 1 - w_fp if w_fn is None else w_fn return w_fp * FP(gt, pd, axis=axis) + w_fn * FN(gt, pd, axis=axis)
[docs] def coverage_score(gt, pd, w_fp=0.5, w_fn=None, axis=None): '''Covergage score function to be maximized. Measure the coverage of X using Y. Parameters ---------- axis : int in {0, 1}, default: None The dimension to which the basis belongs. When `axis` is None, return the overall coverage score. When `axis` is 0, the basis is at dimension 0, thus return the column-wise coverage scores. ''' w_fn = 1 - w_fp if w_fn is None else w_fn return - w_fp * FP(gt, pd, axis=axis) + w_fn * TP(gt, pd, axis=axis) # P - weighted_error()