Source code for tvb.analyzers.pca

# -*- coding: utf-8 -*-
#
#
# TheVirtualBrain-Scientific Package. This package holds all simulators, and
# analysers necessary to run brain-simulations. You can use it stand alone or
# in conjunction with TheVirtualBrain-Framework Package. See content of the
# documentation-folder for more details. See also http://www.thevirtualbrain.org
#
# (c) 2012-2023, Baycrest Centre for Geriatric Care ("Baycrest") and others
#
# This program is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.  See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with this
# program.  If not, see <http://www.gnu.org/licenses/>.
#
#
#   CITATION:
# When using The Virtual Brain for scientific publications, please cite it as explained here:
# https://www.thevirtualbrain.org/tvb/zwei/neuroscience-publications
#
#

"""
Perform Principal Component Analysis (PCA) on a TimeSeries datatype and return
a PrincipalComponents datatype.

.. moduleauthor:: Stuart A. Knock <Stuart@tvb.invalid>

"""

import numpy
import tvb.datatypes.mode_decompositions as mode_decompositions
from tvb.basic.logger.builder import get_logger
from tvb.basic.neotraits.api import HasTraits, Attr, narray_describe

log = get_logger(__name__)


def _compute_weights_and_fractions(data):
    """
    The code for this function has been taken and adapted from Matplotlib 2.1.0
    Aug 2019
    """
    n, m = data.shape
    if n < m:
        raise RuntimeError('we assume data in input is organized with '
                            'numrows>numcols')

    mu = data.mean(axis=0)
    sigma = data.std(axis=0)

    a = (data - mu) / sigma
    U, s, Vh = numpy.linalg.svd(a, full_matrices=False)
    Wt = Vh
    s = s ** 2

    vars = s / len(s)
    fracs = vars / vars.sum()

    return fracs, Wt

"""
Return principal component weights and the fraction of the variance that 
they explain. 
    
PCA takes time-points as observations and nodes as variables.
    
NOTE: The TimeSeries must be longer(more time-points) than the number of
        nodes -- Mostly a problem for TimeSeriesSurface datatypes, which, if 
        sampled at 1024Hz, would need to be greater than 16 seconds long.
"""


# # TODO: Maybe should support first N components or neccessary components to
# #      explain X% of the variance. NOTE: For default surface the weights
# #      matrix has a size ~ 2GB * modes * vars...
[docs] def compute_pca(time_series): """ # type: (TimeSeries) -> PrincipalComponents Compute the temporal covariance between nodes in the time_series. Parameters __________ time_series : TimeSeries The timeseries to which the PCA is to be applied. """ ts_shape = time_series.data.shape # Need more measurements than variables if ts_shape[0] < ts_shape[2]: msg = "PCA requires a longer timeseries (tpts > number of nodes)." log.error(msg) raise Exception(msg) # (nodes, nodes, state-variables, modes) weights_shape = (ts_shape[2], ts_shape[2], ts_shape[1], ts_shape[3]) log.info("weights shape will be: %s" % str(weights_shape)) fractions_shape = (ts_shape[2], ts_shape[1], ts_shape[3]) log.info("fractions shape will be: %s" % str(fractions_shape)) weights = numpy.zeros(weights_shape) fractions = numpy.zeros(fractions_shape) # One inter-node temporal covariance matrix for each state-var & mode. for mode in range(ts_shape[3]): for var in range(ts_shape[1]): data = time_series.data[:, var, :, mode] fracts, w = _compute_weights_and_fractions(data) fractions[:, var, mode] = fracts weights[:, :, var, mode] = w log.debug("fractions") log.debug(narray_describe(fractions)) log.debug("weights") log.debug(narray_describe(weights)) pca_result = mode_decompositions.PrincipalComponents( source=time_series, fractions=fractions, weights=weights, norm_source=numpy.array([]), component_time_series=numpy.array([]), normalised_component_time_series=numpy.array([])) return pca_result