Source code for PyBMF.generators.BlockMatrixGenerator

import numpy as np
from .BaseGenerator import BaseGenerator
    

[docs] class BlockMatrixGenerator(BaseGenerator): '''The block Boolean matrix generator. This generation procedure produces factor matrices U and V with C1P (contiguous-1 property). The factors form a arbitrarily placed block matrix with or without overlapping. The matrix is sorted by nature upon generation. Parameters ---------- m : int The number of rows in X. n : int, optional The number of columns in X. k : int, optional The rank. overlap_flag : bool Whether overlap is allowed or not. size_range : list of 2 or 4 floats The lower and upper bounds of factor rectangle size (height_low, height_high, width_low, width_high), or just upper bounds (height_high, width_high). The real size limit is the bounds times size m, n divided by k. E.g., if `k` = 5 and the image height `m` = 1000, the lower and upper bounds are [0.2, 2.0] * 1000 / 5. ''' def __init__(self, m, n, k, overlap_flag=None, size_range=None): super().__init__() self.check_params(m=m, n=n, k=k, overlap_flag=overlap_flag, size_range=size_range)
[docs] def generate(self, seed=None): '''Generate a matrix. Parameters ---------- seed : int Random seed. ''' self.check_params(seed=seed) self.generate_factors() self.boolean_matmul() # self.sorted_index() # self.set_factor_info() self.to_sparse(type='csr')
[docs] def generate_factors(self): '''Generate factors. ''' # trials using two point sequences with proper overlapping while True: points_start_u, points_end_u = self.generate_factor_points(n=self.m, k=self.k, low=self.size_range[0], high=self.size_range[1]) points_start_v, points_end_v = self.generate_factor_points(n=self.n, k=self.k, low=self.size_range[2], high=self.size_range[3]) trials = 0 if self.check_overlap(self.k, points_start_u, points_end_u, points_start_v, points_end_v) == True: trials += 1 print("[I] check overlap trials: ", trials) self.U = self.generate_factor(n=self.m, k=self.k, points_start=points_start_u, points_end=points_end_u) self.V = self.generate_factor(n=self.n, k=self.k, points_start=points_start_v, points_end=points_end_v) break # try until a qualified config is found
[docs] def generate_factor_points(self, n, k, low, high): '''Generate a factor point sequence. Parameters ---------- n : int The number of points. k : int The rank. low : float The lower bound of the interval. high : float The upper bound of the interval. Returns ------- points_start : list of ints The start points. points_end : list of ints The end points. ''' # trials for a point sequence with proper intervals and do not exceed that dimension avg = n / k # average size points_end = np.zeros(k) trials = 0 while True: trials += 1 print("[I] generate factor trials: ", trials) points_start = self.rng.randint(0, n, size=k) for i in range(k): a = np.floor(points_start[i] + avg * low) b = np.ceil(points_start[i] + avg * high) b = b + 1 if a == b else b points_end[i] = self.rng.randint(low=a, high=b) if all([e <= n for e in points_end]): return points_start.astype(int), points_end.astype(int)
[docs] def check_overlap(self, k, points_start_u, points_end_u, points_start_v, points_end_v): '''Check overlap. Parameters ---------- k : int The rank. points_start_u : list of ints The start points of U. points_end_u : list of ints The end points of U. points_start_v : list of ints The start points of V. points_end_v : list of ints The end points of V. Returns ------- is_overlapped : bool ''' # build a list of rectangles rectangles = [[points_start_u[i], points_end_u[i], points_start_v[i], points_end_v[i]] for i in range(k)] for i in range(k): for j in range(k): if j == i: continue is_overlapped = self.is_overlapped(A=rectangles[i], B=rectangles[j]) if is_overlapped == 2: return False # fully overlap (included) is not allowed if is_overlapped == 1 and self.overlap_flag == False: return False # partial overlap is not allowed if overlap_flag is False return True
[docs] def is_overlapped(self, A, B): '''Check if two rectangles are overlapped. Parameters ---------- A : list of 4 ints The first rectangle. B : list of 4 ints The second rectangle. Returns ------- is_overlapped : int ''' u_overlapped = min(A[1], B[1]) > max(A[0], B[0]) v_overlapped = min(A[3], B[3]) > max(A[2], B[2]) u_included = A[1] <= B[1] and A[0] >= B[0] v_included = A[3] <= B[3] and A[2] >= B[2] if u_included and v_included: return 2 if u_overlapped and v_overlapped: return 1 else: return 0
[docs] def generate_factor(self, n, k, points_start, points_end): '''Generate a factor. Parameters ---------- n : int The number of rows in X. k : int The rank. points_start : list of ints The start points of X. points_end : list of ints The end points of X. ''' # build the C1P factor matrix X = np.zeros([n, k]) for c in range(k): X[points_start[c]:points_end[c], c] = 1 return X