import numpy as np
import time
import numbers
from scipy.sparse import isspmatrix, lil_matrix
from .sparse_utils import sparse_indexing
from scipy.sparse import spmatrix
from .sparse_utils import to_sparse
from .boolean_utils import multiply, matmul
from .decorator_utils import ignore_warnings
[docs]
def get_rng(seed, rng):
'''Get random number generator.
Parameters
----------
seed : optional
Random seed.
rng : optional
Random number generator.
'''
if isinstance(rng, np.random.RandomState):
print("[I] Using RandomState.")
return rng
if isinstance(seed, (numbers.Integral, np.integer)):
print("[I] Using seed :", seed)
return np.random.RandomState(seed)
else:
seed = int(time.time())
print("[I] Using seed :", seed)
return np.random.RandomState(seed)
[docs]
def safe_indexing(X, indices):
'''Return items or rows from X using indices
Allows simple indexing of lists or arrays.
Modified from https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/__init__.py
Parameters
----------
X : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series.
Data from which to sample rows or items.
indices : array-like of int
Indices according to which X will be subsampled.
Returns
-------
subset
Subset of X on first axis
'''
if hasattr(X, "shape"):
if hasattr(X, 'take') and (hasattr(indices, 'dtype') and indices.dtype.kind == 'i'):
# This is often substantially faster than X[indices]
return X.take(indices, axis=0)
else:
if isspmatrix(X):
return sparse_indexing(X, indices=indices)
else:
return X[indices]
else:
return [X[idx] for idx in indices]
[docs]
def binarize(X, threshold=0.5):
'''To binarize a matrix. Also known as Heaviside step function.
Parameters
----------
X : float ndarray, spmatrix
threshold : float, default: 0.5
Returns
-------
result : int ndarray, spmatrix
'''
Y = (X > threshold).astype(int)
if isinstance(X, spmatrix):
Y = to_sparse(Y, type=X.format)
return Y
@ignore_warnings
def sigmoid(X):
'''Sigmoid function.
'''
X = X.astype(np.float64)
Y = np.zeros(X.shape)
Y[X >= 0] = 1.0 / (1.0 + np.exp( - X[X >= 0]) )
Y[X < 0] = np.exp(X[X < 0]) / (1 + np.exp(X[X < 0]))
return Y
[docs]
def d_sigmoid(X):
Y = sigmoid(X)
Z = multiply(Y, 1 - Y)
return Z
[docs]
def get_prediction(U, V, boolean=True, sparse=True):
'''Get prediction.
Parameters
----------
U, V : array, spmatrix
boolean : bool
Whether to apply Boolean multiplication.
'''
return matmul(U, V.T, boolean=boolean, sparse=sparse)
[docs]
def get_prediction_with_threshold(U, V, u=None, v=None, us=None, vs=None, sparse=True):
'''Get prediction after thresholding factors U and V.
Parameters
----------
U : ndarray, spmatrix
The factor matrix.
V : ndarray, spmatrix
The factor matrix.
u : float
The shared threshold across all factors for ``U``.
v : float
The shared threshold across all factors for ``V``.
us : list of k floats
The individual thresholds for each factor in ``U``.
vs : list of k floats
The individual thresholds for each factor in ``V``.
Returns
-------
X_pd : ndarray, spmatrix
The prediction matrix.
'''
U, V = U.copy(), V.copy()
if us is not None:
assert len(us) == U.shape[1]
for i in range(U.shape[1]):
U[:, i] = binarize(U[:, i], us[i])
elif u is not None:
U = binarize(U, u)
if vs is not None:
assert len(vs) == V.shape[1]
for i in range(V.shape[1]):
V[:, i] = binarize(V[:, i], vs[i])
elif v is not None:
V = binarize(V, v)
X_pd = matmul(U, V.T, boolean=True, sparse=sparse)
return X_pd
[docs]
def get_residual(X, U, V):
'''Get residual matrix of X.
'''
pattern = get_prediction(U, V, boolean=True)
X = lil_matrix(X.copy())
X[pattern.astype(bool)] = 0
return X
[docs]
def to_interval(X, min, max):
'''Transform data into interval [min, max].
Parameters
----------
X : ndarray
min : float
max : float
TODO: to support spmatrix
'''
min_val = X.min()
max_val = X.max()
return (X - min_val) / (max_val - min_val) * (max - min) + min