Source code for constrainedmf.nmf.utils

import torch
import numpy as np
import scipy.stats as st


def normalize(x: torch.Tensor, axis=0) -> torch.Tensor:
    return x / x.sum(axis, keepdim=True)


def scalar_to_vec(x, k, dist="unif", nsig=3):
    """
    Helper function to generate 1d kernel distributions that integrate
      to a specified scalar. Used in generalizing NMF initialization
      weights to NMFD.
    ------------
    Parameters
    ------------
    x: float to which the 1d kernel should integrate
    k: integer width of the 1d kernel
    dist: probability distribution ("unif" or "gauss") to use for kernel
    nsig: for Gaussian kernels, number of SDs to span in k steps
    ------------
    Returns
    ------------
    numpy array of shape [1,k]
    """
    if dist == "unif":
        return np.ones(k) * (x / k)

    elif dist == "gauss":
        t = np.linspace(-nsig, nsig, k + 1)
        return np.diff(st.norm.cdf(t)) * x

    else:
        raise ValueError('Currently supports only "unif" and "gauss"')


[docs]def sweep_components(X, n_max=None, n_min=2): """ Sweeps over all values of n_components and returns a plot of losses vs n_components Parameters ---------- X : Tensor n_max : int Max n in search, default X.shape[0] n_min : int Min n in search, default 2 Returns ------- fig, axes """ import matplotlib.pyplot as plt from constrainedmf.nmf.models import NMF if n_max is None: n_max = X.shape[0] losses = list() kl_losses = list() for n_components in range(n_min, n_max + 1): nmf = NMF(X.shape, n_components) nmf.fit(X, beta=2, tol=1e-8, max_iter=500) losses.append(nmf.loss(X, beta=2)) kl_losses.append(nmf.loss(X, beta=1)) fig, axes = plt.subplots(1, 2) x = list(range(n_min, n_max + 1)) axes[0].plot(x, losses, label="MSE Loss") axes[0].set_title("MSE Loss") axes[1].plot(x, kl_losses, label="KL Loss") axes[1].set_title("KL Loss") fig.suptitle("Loss vs # of Components") return fig, axes
[docs]def iterative_nmf( NMFClass, X, n_components, *, beta=2, alpha=0.0, tol=1e-8, max_iter=1000, **kwargs ): """ Utility for performing NMF on a stream of data along a common state variable (Temperature or composition), that coincides with the data ordering. Parameters ---------- NMFClass : class Child class of NMFBase X : Tensor Data to perform NMF on n_components : int Number of components for NMF beta : int Beta for determining loss function alpha : float Alpha for determining regularization. Default 0.0 is no regularization. tol : float Optimization tolerance max_iter : int Maximum optimization iterations kwargs : dict Passed to intialization of NMF Returns ------- nmfs : list of NMF instances """ nmfs = list() initial_components = [torch.rand(1, X.shape[-1]) for _ in range(n_components)] fix_components = [False for _ in range(n_components)] # Start on bounding the outer components initial_components[0] = X[0, :].reshape(1, -1) fix_components[0] = True initial_components[-1] = X[-1, :].reshape(1, -1) fix_components[-1] = True nmf = NMFClass( X.shape, n_components, initial_components=initial_components, fix_components=fix_components, **kwargs ) nmf.fit(X, beta=beta, tol=tol, max_iter=max_iter, alpha=alpha) nmfs.append(nmf) if len(nmf.W.shape) == 3: convolutional = True else: convolutional = False visited = {0, n_components - 1} for _ in range(n_components - 2): # Find next most prominent weight if convolutional: indices = ( nmf.W.sum(axis=-1).max(axis=0).values.argsort(descending=True).numpy() ) else: indices = nmf.W.max(axis=0).values.argsort(descending=True).numpy() for i in indices: if i in visited: continue else: visited.add(i) weight_idx = i break # Find most important component to that weight if convolutional: pattern_idx = int(nmf.W.sum(axis=-1).argmax(axis=0)[weight_idx]) else: pattern_idx = int(nmf.W.argmax(axis=0)[weight_idx]) # Lock and run initial_components[weight_idx] = X[pattern_idx, :].reshape(1, -1) fix_components[weight_idx] = True nmf = NMFClass( X.shape, n_components, initial_components=initial_components, fix_components=fix_components, **kwargs ) nmf.fit(X, beta=beta, tol=tol, max_iter=max_iter, alpha=alpha) nmfs.append(nmf) return nmfs