Source code for cca_zoo.linear._iterative._scca_span

from typing import Union, Iterable

import numpy as np

from cca_zoo._utils._checks import _process_parameter
from cca_zoo._utils._cross_correlation import cross_cov
from cca_zoo.linear._iterative._base import _BaseIterative
from cca_zoo.linear._iterative._deflation import _DeflationMixin
from cca_zoo.linear._search import _delta_search
from cca_zoo.linear._search import support_threshold


[docs] class SCCA_Span(_DeflationMixin, _BaseIterative): r""" Fits a Sparse _CCALoss model using SpanCCA. .. math:: w_{opt}=\underset{w}{\mathrm{argmax}}\{\sum_i\sum_{j\neq i} \|X_iw_i-X_jw_j\|^2 + \text{l1_ratio}\|w_i\|_1\}\\ \text{subject to:} w_i^TX_i^TX_iw_i=1 References ---------- Asteris, Megasthenis, et al. "A simple and provable algorithm for sparse diagonal _CCALoss." International Conference on Machine Learning. PMLR, 2016. """ def __init__( self, latent_dimensions: int = 1, epochs: int = 100, copy_data=True, initialization: str = "pls", tol: float = 1e-3, regularisation="l0", tau: Union[Iterable[Union[float, int]], Union[float, int]] = None, rank=1, positive: Union[Iterable[bool], bool] = None, random_state=None, verbose=True, early_stopping=False, ): super().__init__( latent_dimensions=latent_dimensions, epochs=epochs, copy_data=copy_data, initialization=initialization, tol=tol, random_state=random_state, verbose=verbose, early_stopping=early_stopping, ) self.tau = tau self.regularisation = regularisation self.rank = rank self.positive = positive def _check_params(self): """check number of representations=2""" if self.n_views_ != 2: raise ValueError("SCCA_Span requires only 2 representations") self.max_obj = 0 if self.regularisation == "l0": self.update = support_threshold elif self.regularisation == "l1": self.update = _delta_search self.tau = _process_parameter("tau", self.tau, 1, self.n_views_) self.positive = _process_parameter( "positive", self.positive, False, self.n_views_ ) def _update_weights(self, views: np.ndarray, i: int) -> None: """Update the weights_ for the i-th component. Args: views (np.ndarray): The input representations as numpy arrays. i (int): The index of the component. """ # if P, D, Q not initialised, initialise them if getattr(self, "P", None) is None: self._initialize_variables(views) if i == 0: # generate a random vector c c = self.random_state.randn(self.rank) c /= np.linalg.norm(c) # compute a = P D c a = self.P @ np.diag(self.D) @ c # apply the update function to a with tau[0] u = self.update(a, self.tau[0]) u /= np.linalg.norm(u) # update the objective value and the weights_ if improved return u[:, np.newaxis] elif i == 1: b = self.Q @ np.diag(self.D) @ self.P.T @ self.weights_[0] v = self.update(b, self.tau[1]) v /= np.linalg.norm(v) return v def _initialize_variables(self, views): self.max_obj = [0, 0] cov = cross_cov(views[0], views[1], rowvar=False) # Perform SVD on im and obtain individual matrices P, D, Q = np.linalg.svd(cov, full_matrices=True) self.P = P[:, : self.rank] self.D = D[: self.rank] self.Q = Q[: self.rank, :].T