Source code for cca_zoo.linear._gcca

from typing import Iterable, Union

import numpy as np

from cca_zoo._utils._checks import _process_parameter
from cca_zoo.linear._mcca import MCCA



[docs]
class GCCA(MCCA):
    r"""
    A class used to fit GCCA model. This model extends CCA to more than two representations by optimizing the sum of correlations with a shared auxiliary vector.

    The objective function of GCCA is:

    .. math::

        w_{opt}=\underset{w}{\mathrm{argmax}}\{ \sum_iw_i^TX_i^TT  \}\\

        \text{subject to:}

        T^TT=1

    where :math:`T` is the auxiliary vector.

    Examples
    --------
    >>> from cca_zoo.linear import GCCA
    >>> import numpy as np
    >>> rng=np.random.RandomState(0)
    >>> X1 = rng.random((10,5))
    >>> X2 = rng.random((10,5))
    >>> X3 = rng.random((10,5))
    >>> model = GCCA()
    >>> model.fit((X1,X2,X3)).score((X1,X2,X3))

    References
    ----------
    Tenenhaus, Arthur, and Michel Tenenhaus. "Regularized generalized canonical correlation analysis." Psychometrika 76.2 (2011): 257.
    """

    def __init__(
        self,
        latent_dimensions: int = 1,
        copy_data=True,
        random_state=None,
        c: Union[Iterable[float], float] = None,
        view_weights: Iterable[float] = None,
        eps: float = 1e-6,
    ):
        super().__init__(
            latent_dimensions=latent_dimensions,
            copy_data=copy_data,
            accept_sparse=["csc", "csr"],
            random_state=random_state,
            c=c,
            eps=eps,
            pca=False,
        )
        self.view_weights = view_weights


[docs]
    def fit(self, views: Iterable[np.ndarray], y=None, K=None, **kwargs):
        return super().fit(views, y=y, K=K, **kwargs)


    def _check_params(self):
        self.c = _process_parameter("c", self.c, 0, self.n_views_)

    def _C(self, views, K=None):
        if K is None:
            # just use identity when all rows are observed in all representations.
            K = np.ones((len(views), views[0].shape[0]))
        Q = []
        self.view_weights = _process_parameter(
            "view_weights", self.view_weights, 1, self.n_views_
        )
        for i, (view, view_weight) in enumerate(zip(views, self.view_weights)):
            view_cov = (1 - self.c[i]) * np.cov(view, rowvar=False) + self.c[
                i
            ] * np.eye(view.shape[1])
            smallest_eig = min(0, np.linalg.eigvalsh(view_cov).min()) - self.eps
            view_cov = view_cov - smallest_eig * np.eye(view_cov.shape[0])
            Q.append(view_weight * view @ np.linalg.inv(view_cov) @ view.T)
        Q = np.sum(Q, axis=0)
        Q = (
            np.diag(np.sqrt(np.sum(K, axis=0)))
            @ Q
            @ np.diag(np.sqrt(np.sum(K, axis=0)))
        )
        return Q

    def _D(self, views, **kwargs):
        return None

    def _weights(self, eigvals, eigvecs, views, **kwargs):
        self.weights_ = [
            np.linalg.pinv(view) @ eigvecs[:, : self.latent_dimensions]
            for view in views
        ]

    def _more_tags(self):
        return {"multiview": True}