Source code for cca_zoo.models.partialcca

from typing import Iterable, Union

import numpy as np
from scipy.linalg import block_diag
from sklearn.utils.validation import check_is_fitted

from cca_zoo.models import MCCA
from cca_zoo.utils import _check_views


[docs]class PartialCCA(MCCA):
    r"""
    A class used to fit a partial cca model. The key difference between this and a vanilla CCA or MCCA is that
    the canonical score vectors must be orthogonal to the supplied confounding variables.

    :Citation:

    Rao, B. Raja. "Partial canonical correlations." Trabajos de estadistica y de investigación operativa 20.2-3 (1969): 211-219.

    :Example:
    >>> from cca_zoo.models import PartialCCA
    >>> X1 = np.random.rand(10,5)
    >>> X2 = np.random.rand(10,5)
    >>> confounds = np.random.rand(10,3)
    >>> model = PartialCCA()
    >>> model.fit((X1,X2),confounds=confounds).score((X1,X2),confounds=confounds)
    array([0.99993046])

    """

    def __init__(
        self,
        latent_dims: int = 1,
        scale: bool = True,
        centre=True,
        copy_data=True,
        random_state=None,
        c: Union[Iterable[float], float] = None,
        eps=1e-3,
    ):
        """
        Constructor for Partial CCA

        :param latent_dims: number of latent dimensions to fit
        :param scale: normalize variance in each column before fitting
        :param centre: demean data by column before fitting (and before transforming out of sample
        :param copy_data: If True, X will be copied; else, it may be overwritten
        :param random_state: Pass for reproducible output across multiple function calls
        :param c: Iterable of regularisation parameters for each view (between 0:CCA and 1:PLS)
        :param eps: epsilon for stability
        """
        super().__init__(
            latent_dims=latent_dims,
            scale=scale,
            centre=centre,
            copy_data=copy_data,
            random_state=random_state,
        )
        self.c = c
        self.eps = eps

    def _setup_evp(self, views: Iterable[np.ndarray], confounds=None):
        if confounds is None:
            raise ValueError(
                f"confounds is {confounds}. Require matching confounds to transform with"
                f"partial CCA."
            )
        self.confound_betas = [np.linalg.pinv(confounds) @ view for view in views]
        views = [
            view - confounds @ np.linalg.pinv(confounds) @ view
            for view, confound_beta in zip(views, self.confound_betas)
        ]
        all_views = np.concatenate(views, axis=1)
        C = all_views.T @ all_views / self.n
        # Can regularise by adding to diagonal
        D = block_diag(
            *[
                (1 - self.c[i]) * m.T @ m / self.n + self.c[i] * np.eye(m.shape[1])
                for i, m in enumerate(views)
            ]
        )
        C -= block_diag(*[view.T @ view / self.n for view in views]) - D
        D_smallest_eig = min(0, np.linalg.eigvalsh(D).min()) - self.eps
        D = D - D_smallest_eig * np.eye(D.shape[0])
        self.splits = np.cumsum([0] + [view.shape[1] for view in views])
        return views, C, D

    # TODO TRANSFORM
[docs]    def transform(self, views: Iterable[np.ndarray], y=None, confounds=None):
        """
        Transforms data given a fit model

        :param views: numpy arrays with the same number of rows (samples) separated by commas
        """
        if confounds is None:
            raise ValueError(
                f"confounds is {confounds}. Require matching confounds to transform with"
                f"partial CCA."
            )
        check_is_fitted(self, attributes=["weights"])
        views = _check_views(
            *views, copy=self.copy_data, accept_sparse=self.accept_sparse
        )
        views = self._centre_scale_transform(views)
        transformed_views = []
        for i, (view) in enumerate(views):
            transformed_view = (
                view - confounds @ self.confound_betas[i]
            ) @ self.weights[i]
            transformed_views.append(transformed_view)
        return transformed_views