Source code for tvb.core.entities.file.file_update_scripts.002_update_files

# -*- coding: utf-8 -*-
# TheVirtualBrain-Framework Package. This package holds all Data Management, and 
# Web-UI helpful to run brain-simulations. To use it, you also need to download
# TheVirtualBrain-Scientific Package (for simulators). See content of the
# documentation-folder for more details. See also
# (c) 2012-2024, Baycrest Centre for Geriatric Care ("Baycrest") and others
# This program is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.  See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with this
# program.  If not, see <>.
# When using The Virtual Brain for scientific publications, please cite it as explained here:

.. moduleauthor:: Bogdan Neacsa <>
from datetime import datetime
import os
import sys
import importlib
import tables
import h5py
import numpy
from tvb.basic.profile import TvbProfile
from tvb.core.utils import string2bool, string2date, date2string
from import FileVersioningException

DATA_BUFFER_SIZE = 50000000 / 8  # 500 MB maximum read at once (just assume worst case float64)

# ---------------------- TVB 1.0 Specific constants and functions start here --------------------------
# We duplicate these constants here since they were the ones used in TVB 1.0 and 
# this upgrade script needs to be able to convert any data that was created in 1.0 to 1.0.1.
# Since this script needs to execute properly even in a latter version (e.g. 2.0) it's not
# feasible to use any constants currently declared in TVB, since those might change over time.
DATE_TIME_FORMAT = '%Y-%m-%d %H:%M:%S.%f'
COMPLEX_TIME_FORMAT = '%Y-%m-%d,%H-%M-%S.%f'

def _serialize_value(value):
    This method takes a value which will be stored as meta-data and 
    apply some transformation if necessary
    :param value:  value which is planned to be stored
    :returns: value to be stored
    NOTE: this method was a part of TVB 1.0 hdf5storage manager, but since this
    script needs to be independent of current storage manager, we duplicate it here. 
    if value is None:
        return ''
    # Transform boolean to string and prefix it
    if isinstance(value, bool):
        return BOOL_VALUE_PREFIX + str(value)
    # Transform date to string and append prefix
    elif isinstance(value, datetime):
        return DATETIME_VALUE_PREFIX + date2string(value, date_format=DATE_TIME_FORMAT)
        return value

def _deserialize_value(value):
    This method takes value loaded from H5 file and transform it to TVB data. 
    :param value: the value that was read from the H5 file
    :returns: a TVB specific deserialized value of the input
    NOTE: this method was a part of TVB 1.0 hdf5storage manager, but since this
    script needs to be independent of current storage manager, we duplicate it here. 
    if value is not None:
        if isinstance(value, numpy.string_):
            if len(value) == 0:
                value = None
                value = str(value)
        if isinstance(value, str):
            if value.startswith(BOOL_VALUE_PREFIX):
                # Remove bool prefix and transform to bool
                return string2bool(value[len(BOOL_VALUE_PREFIX):])
            if value.startswith(DATETIME_VALUE_PREFIX):
                # Remove datetime prefix and transform to datetime
                return string2date(value[len(DATETIME_VALUE_PREFIX):], date_format=DATE_TIME_FORMAT)
    return value
# ---------------------- TVB 1.0 Specific constants and functions end here --------------------------

def __upgrade_file(input_file_name, output_file_name):
    This method does any required processing in order to convert an input file stored in
    TVB 1.0 format into an output_file of TVB 2.0 format. 
    NOTE: This should not be used directly since the simultaneous use of pyTables and h5py 
    causes segmentation faults on some setups (Debian 32/65, Fedora 64, Windows 64) on file
    open/close. (Probably caused by some GIL / C level incompatibilities). Instead of this 
    use the `upgrade(file_name)` which will call this method in a separate Python process.
    :param input_file_name: the path to a input *.h5 file from TVB 1.0 using pyTables format
        for storage
    :param output_file_name: the path to a output *.h5 that will be written in h5py TVB 1.0.1
        specific format
    tables_h5_file = tables.openFile(input_file_name, 'r')
    if os.path.exists(output_file_name):
    h5py_h5_file = h5py.File(output_file_name, 'a')
    # Iterate through all pyTables nodes
    for tables_node in tables_h5_file.walkNodes():
        node_path = tables_node._v_pathname.replace('/', '')
        node_metadata = {}
        # Get meta-data from the pyTables node. This does not change for root/group/Carray nodes
        all_meta_keys = tables_node._v_attrs._f_list('user')
        for meta_key in all_meta_keys:
            new_key = meta_key
            value = tables_h5_file.getNodeAttr(tables_node, meta_key)
            node_metadata[new_key] = _deserialize_value(value)
        if tables_node.__class__ is
            # For the root the node is already created in the h5py equivalent
            h5py_node = h5py_h5_file['/'] 
        elif tables_node.__class__ is
            # For groups just create an empty datas-et since it's easier to handle
            # than sub-groups.
            h5py_node = h5py_h5_file.create_dataset(node_path, (1,))
            # We have a standard node (Carray), compute based on the shape if it will
            # fit in the DATA_BUFFER_SIZE we set or we need to read/write by chunks.
            node_shape = tables_node.shape
            max_dimension = 0
            total_size = 1
            for idx, val in enumerate(node_shape):
                if val > node_shape[max_dimension]:
                    max_dimension = idx
                total_size = total_size * val
            if total_size <= DATA_BUFFER_SIZE:
                # We did not pass our buffer size, so it's save to just read/write the whole data at once
                node_data =
                h5py_node = h5py_h5_file.create_dataset(node_path, data=node_data, 
                                                        shape=node_data.shape, dtype=node_data.dtype)
                # We need to read in chunks. Set the dimension that is growable to None
                node_shape_list = list(node_shape)
                node_shape_list[max_dimension] = None
                h5py_node = h5py_h5_file.create_dataset(node_path, shape=node_shape, maxshape=tuple(node_shape_list))
                slice_size = max(int(DATA_BUFFER_SIZE * node_shape[max_dimension] / total_size), 1)
                full_slice = slice(None, None, None)
                data_slice = [full_slice for _ in node_shape]
                for idx in range(0, node_shape[max_dimension], slice_size):
                    specific_slice = slice(idx, idx + slice_size, 1)
                    data_slice[max_dimension] = specific_slice
                    tables_data = tables_node[tuple(data_slice)]
                    h5py_node = h5py_h5_file[node_path]
                    h5py_node[tuple(data_slice)] = tables_data
        for meta_key in node_metadata:
            processed_value = _serialize_value(node_metadata[meta_key])
            h5py_node.attrs[meta_key] = processed_value
    h5py_h5_file['/'].attrs[TVB_ATTRIBUTE_PREFIX + DATA_VERSION_ATTRIBUTE] = 2
    # Reloading h5py seems to fix the segmentation fault that used to appear.

[docs] def update(input_file, burst_match_dict=None): """ In order to avoid segmentation faults when updating a batch of files just start every conversion on a different Python process. :param input_file: the file that needs to be converted to a newer file storage version. This should be a file that still uses TVB 1.0 storage (pyTables) """ # Just to avoid any problems about renaming open files, do a rename from the start # and if case of a fault in the os.system call just rename back and remove the output file. if not os.path.isfile(input_file): raise FileVersioningException("The input path %s received for upgrading from 1 -> 2 is not a " "valid file on the disk." % input_file) # Use a file-path with no spaces both for the temporary file and the input file # that is passed to the os.system call and just rename to original file at the # end of the processing to avoid any problems with parameters passed to os.system. input_file_no_spaces = input_file.replace(' ', '') path_to, file_name = os.path.split(input_file_no_spaces) tmp_convert_file = os.path.join(path_to, 'tmp_' + file_name) os.rename(input_file, tmp_convert_file) ok_status = os.system(PYTHON_EXE_PATH + ' -m %s %s %s' % (__name__, tmp_convert_file, input_file_no_spaces)) if ok_status == 0: # Call finished successfully os.remove(tmp_convert_file) os.rename(input_file_no_spaces, input_file) else: # Call failed for some reason, just rename back the input file os.rename(tmp_convert_file, input_file) raise FileVersioningException("Something went wrong during the upgrade to file %s." % input_file)
### This main is important, and used by the update() method from above. ### Do not drop this __main__ if __name__ == '__main__': if len(sys.argv) != 3: raise FileVersioningException("Usage is `python -m tvb.core.entities.file.file_update_scripts.002_update_files" " input_file_name output_file_name`.") input_file = sys.argv[1] output_file = sys.argv[2] __upgrade_file(input_file, output_file)