from .boolean_utils import multiply, matmul, dot, power, ismat
from .sparse_utils import to_dense, to_triplet, to_sparse
from scipy.sparse import spmatrix, issparse, csr_matrix
import numpy as np
from sklearn.metrics import recall_score, precision_score, accuracy_score, f1_score
[docs]
def get_metrics(gt, pd, metrics, axis=None):
'''Get results of the metrics all at once.
Metrics from sklearn.metrics are included as sanity check. Their input must be binary `array`, which makes them slow and less flexible.
Parameters
----------
gt : array, spmatrix
Ground truth, can be 1d array, 2d dense or sparse matrix.
pd : array, spmatrix
Prediction, can be 1d array, 2d dense or sparse matrix.
When the input are matrices, row and column-wise measurement can be conducted by defining `axis`.
metrics : list of str
The name of metrics.
axis : int in {0, 1}
When `axis` == 0, The `result` containing the column-wise measurement has the same length as columns.
Returns
-------
results : list
'''
if np.isnan(to_dense(pd, squeeze=True)).any():
raise TypeError("NaN is found in prediction.")
functions = {
'TP': TP, 'FP': FP, 'TN': TN, 'FN': FN,
'TPR': TPR, 'FPR': FPR, 'TNR': TNR, 'FNR': FNR,
'PPV': PPV, 'ACC': ACC, 'ERR': ERR, 'F1': F1,
'Recall': TPR, 'Precision': PPV, 'Accuracy': ACC, 'Error': ERR, # alias
'RMSE': RMSE, 'MAE': MAE, # real distances
}
sklearn_metrics = {
'recall_score': recall_score, 'precision_score': precision_score,
'accuracy_score': accuracy_score, 'f1_score': f1_score,
}
results = []
for m in metrics:
if m in functions:
results.append(functions[m](gt, pd, axis))
elif m in sklearn_metrics: # must be binary arrays
gt = to_dense(gt).flatten()
pd = to_dense(pd).flatten()
results.append(sklearn_metrics[m](gt, pd))
else:
results.append(None)
return results
[docs]
def TP(gt, pd, axis=None):
s = multiply(gt, pd, boolean=True).sum(axis=axis)
return np.array(s).squeeze()
[docs]
def FP(gt, pd, axis=None):
diff = pd - gt
if issparse(diff):
s = diff.maximum(0).sum(axis=axis)
return np.array(s).squeeze()
else:
s = np.maximum(diff, 0).sum(axis=axis)
return s
[docs]
def TN(gt, pd, axis=None):
return TP(gt=invert(gt), pd=invert(pd), axis=axis)
[docs]
def FN(gt, pd, axis=None):
return FP(gt=pd, pd=gt, axis=axis)
[docs]
def TPR(gt, pd, axis=None):
'''sensitivity, recall, hit rate, or true positive rate
'''
denom = gt.sum(axis=axis)
return TP(gt, pd, axis=axis) / denom if denom > 0 else 0
[docs]
def TNR(gt, pd, axis=None):
'''specificity, selectivity or true negative rate
'''
denom = invert(gt).sum(axis=axis)
return TN(gt, pd, axis=axis) / denom if denom > 0 else 0
[docs]
def FPR(gt, pd, axis=None):
'''fall-out or false positive rate
'''
return 1 - TNR(gt, pd, axis=axis)
[docs]
def FNR(gt, pd, axis=None):
'''miss rate or false negative rate
'''
return 1 - TPR(gt, pd, axis=axis)
[docs]
def PPV(gt, pd, axis=None):
'''precision or positive predictive value
'''
denom = pd.sum(axis=axis)
return TP(gt, pd, axis=axis) / denom if denom > 0 else 0
[docs]
def ACC(gt, pd, axis=None):
'''Accuracy.
'''
if len(pd.shape) == 2:
n = pd.shape[0] * pd.shape[1] if axis is None else pd.shape[axis]
else:
n = len(pd)
return (TP(gt, pd, axis) + TN(gt, pd, axis)) / n
[docs]
def ERR(gt, pd, axis=None):
'''Error rate.
'''
return 1 - ACC(gt, pd, axis)
[docs]
def F1(gt, pd, axis=None):
'''F1 score.
tp = TP(gt, pd, axis)
fp = FP(gt, pd, axis)
fn = FN(gt, pd, axis)
return 2 * tp / (2 * tp + fp + fn)
'''
precision = PPV(gt, pd, axis)
recall = TPR(gt, pd, axis)
denom = precision + recall
return 2 * precision * recall / denom if denom > 0 else 0
def _get_size(X, axis=None):
if axis is not None:
return X.shape[axis]
else:
return X.shape[0] * X.shape[1] if len(X.shape) == 2 else len(X)
[docs]
def RMSE(gt, pd, axis=None):
N = _get_size(gt, axis=axis)
rmse = np.sqrt(power(gt - pd, 2).sum(axis) / N)
return rmse
[docs]
def MAE(gt, pd, axis=None):
N = _get_size(gt, axis=axis)
mae = np.abs(gt - pd).sum(axis) / N
return mae
[docs]
def invert(X):
if issparse(X):
X = csr_matrix(np.ones(X.shape)) - X
elif isinstance(X, np.ndarray):
X = 1 - X
else:
raise TypeError
return X
[docs]
def description_length(gt, U, V, pd=None, w_model=1.0, w_fp=1.0, w_fn=1.0):
'''The vanilla description length function.
Will compute X_pd from U and V if pd is None.
'''
pd = matmul(U, V.T, sparse=True, boolean=True) if pd is None else pd
return w_model * (U.sum() + V.sum()) + w_fp * FP(gt, pd) + w_fn * FN(gt, pd)
[docs]
def weighted_error(gt, pd, w_fp=0.5, w_fn=None, axis=None):
'''Coverage cost function to be minimized.
'''
w_fn = 1 - w_fp if w_fn is None else w_fn
return w_fp * FP(gt, pd, axis=axis) + w_fn * FN(gt, pd, axis=axis)
[docs]
def coverage_score(gt, pd, w_fp=0.5, w_fn=None, axis=None):
'''Covergage score function to be maximized.
Measure the coverage of X using Y.
Parameters
----------
axis : int in {0, 1}, default: None
The dimension to which the basis belongs.
When `axis` is None, return the overall coverage score.
When `axis` is 0, the basis is at dimension 0, thus return the column-wise coverage scores.
'''
w_fn = 1 - w_fp if w_fn is None else w_fn
return - w_fp * FP(gt, pd, axis=axis) + w_fn * TP(gt, pd, axis=axis) # P - weighted_error()