Source code for cca_zoo.linear._gcca

from typing import Iterable, Union

import numpy as np

from cca_zoo._utils._checks import _process_parameter
from cca_zoo.linear._mcca import MCCA


[docs] class GCCA(MCCA): r""" A class used to fit GCCA model. This model extends CCA to more than two representations by optimizing the sum of correlations with a shared auxiliary vector. The objective function of GCCA is: .. math:: w_{opt}=\underset{w}{\mathrm{argmax}}\{ \sum_iw_i^TX_i^TT \}\\ \text{subject to:} T^TT=1 where :math:`T` is the auxiliary vector. Examples -------- >>> from cca_zoo.linear import GCCA >>> import numpy as np >>> rng=np.random.RandomState(0) >>> X1 = rng.random((10,5)) >>> X2 = rng.random((10,5)) >>> X3 = rng.random((10,5)) >>> model = GCCA() >>> model.fit((X1,X2,X3)).score((X1,X2,X3)) References ---------- Tenenhaus, Arthur, and Michel Tenenhaus. "Regularized generalized canonical correlation analysis." Psychometrika 76.2 (2011): 257. """ def __init__( self, latent_dimensions: int = 1, copy_data=True, random_state=None, c: Union[Iterable[float], float] = None, view_weights: Iterable[float] = None, eps: float = 1e-6, ): super().__init__( latent_dimensions=latent_dimensions, copy_data=copy_data, accept_sparse=["csc", "csr"], random_state=random_state, c=c, eps=eps, pca=False, ) self.view_weights = view_weights
[docs] def fit(self, views: Iterable[np.ndarray], y=None, K=None, **kwargs): return super().fit(views, y=y, K=K, **kwargs)
def _check_params(self): self.c = _process_parameter("c", self.c, 0, self.n_views_) def _C(self, views, K=None): if K is None: # just use identity when all rows are observed in all representations. K = np.ones((len(views), views[0].shape[0])) Q = [] self.view_weights = _process_parameter( "view_weights", self.view_weights, 1, self.n_views_ ) for i, (view, view_weight) in enumerate(zip(views, self.view_weights)): view_cov = (1 - self.c[i]) * np.cov(view, rowvar=False) + self.c[ i ] * np.eye(view.shape[1]) smallest_eig = min(0, np.linalg.eigvalsh(view_cov).min()) - self.eps view_cov = view_cov - smallest_eig * np.eye(view_cov.shape[0]) Q.append(view_weight * view @ np.linalg.inv(view_cov) @ view.T) Q = np.sum(Q, axis=0) Q = ( np.diag(np.sqrt(np.sum(K, axis=0))) @ Q @ np.diag(np.sqrt(np.sum(K, axis=0))) ) return Q def _D(self, views, **kwargs): return None def _weights(self, eigvals, eigvecs, views, **kwargs): self.weights_ = [ np.linalg.pinv(view) @ eigvecs[:, : self.latent_dimensions] for view in views ] def _more_tags(self): return {"multiview": True}