Source code for PyBMF.models.GreConD

from .BaseModel import BaseModel
from scipy.sparse import lil_matrix
from ..utils import multiply, bool_to_index, ERR, get_prediction, show_matrix, get_residual, matmul, add
import numpy as np


[docs] class GreConD(BaseModel): '''The GreConD algorithm for exact Boolean decomposition. .. topic:: Reference Discovery of optimal factors in binary data via a novel method of matrix decomposition. Parameters ---------- k : int, optional The target rank. If ``None``, it will factorize until the error is smaller than `tol`, or when other stopping criteria is met. tol : float, default: 0 The error tolerance. ''' def __init__(self, k=None, tol=0): self.check_params(k=k, tol=tol)
[docs] def fit(self, X_train, X_val=None, X_test=None, **kwargs): '''Fit the model. ''' super().fit(X_train, X_val, X_test, **kwargs) self._fit() self.finish(show_logs=self.show_logs, save_model=self.save_model, show_result=self.show_result)
[docs] def _fit(self): '''The main process if fitting. ''' k = 0 is_factorizing = True self.X_rs = lil_matrix(self.X_train.copy()) while is_factorizing: score, u, v = get_concept(self.X_train, self.X_rs) if score == 0: is_factorizing = self.early_stop(msg="No pattern found", k=k) break # update factors self.set_factors(k, u=u, v=v) # evaluate self.X_pd = get_prediction(U=self.U, V=self.V, boolean=True) self.X_rs = get_residual(X=self.X_train, U=self.U, V=self.V) self.evaluate( df_name='updates', head_info={ 'k': k, 'score': score, 'shape': [u.sum(), v.sum()], }, ) # early stop detection error = ERR(gt=self.X_train, pd=self.X_pd) print("[I] k: {}, score: {}, error: {:.3f}, shape: [{}, {}]".format(k, score, error, u.sum(), v.sum())) is_factorizing = self.early_stop(error=error, n_factor=k+1, k=k) k += 1
[docs] def get_concept(X_gt, X_rs): '''Get a concept/pattern from the residual matrix. Parameters ---------- X_gt : spmatrix The ground-truth matrix. X_rs : spmatrix The residual matrix. Returns ------- score : float The TP coverage of the pattern over X_gt. u : (m, 1) spmatrix The factors. If the pattern is not found, they'll be zero vectors. v : (n, 1) spmatrix The factors. If the pattern is not found, they'll be zero vectors. ''' m, n = X_gt.shape score, best_score = 0, 0 u, best_u = None, lil_matrix(np.ones((m, 1))) v, best_v = None, lil_matrix((n, 1)) # column ids of residual matrix j_rs = bool_to_index(X_rs.sum(axis=0) > 0) n_iter = 0 is_improving = True while is_improving: last_score = best_score j_list = [j for j in j_rs if not best_v[j] == 1] for j in j_list: u = multiply(lil_matrix(X_gt[:, j]), best_u) if u.sum() * n > score: # check the score upper bound u_idx = bool_to_index(u) v_idx = bool_to_index(X_gt[u_idx, :].sum(axis=0) == u.sum()) v = lil_matrix((n, 1)) v[v_idx] = 1 if u.sum() * v.sum() > score: # check the score upper bound score = X_rs[u_idx][:, v_idx].sum() if score > best_score: best_score = score best_u = u best_v = v if last_score == best_score: is_improving = False n_iter += 1 return best_score, best_u, best_v