fedbiomed.researcher.filetools

Module: fedbiomed.researcher.filetools

Functions for managing Job/Experiment files.

Functions

choose_bkpt_file(experimentation_folder, round_=0)

It creates a breakpoint folder and chooses a breakpoint file name for each round.

Parameters:

Name Type Description Default
experimentation_folder str

indicates the experimentation folder name. This should just contain the name of the folder not a full path.

required
round_ int

the current number of already run rounds minus one. Starts from 0. Defaults to 0.

0

Raises:

Type Description
PermissionError

cannot create experimentation folder

OSError

cannot create experimentation folder

Returns:

Type Description
Tuple[str, str]

A tuple that contains following instacens breakpoint_folder_path: name of the created folder that will contain all data for the current round breakpoint_file: name of the file that will contain the state of an experiment.

Source code in fedbiomed/researcher/filetools.py
def choose_bkpt_file(experimentation_folder: str, round_: int = 0) -> Tuple[str, str]:
    """
    It creates a breakpoint folder and chooses a breakpoint file name for each round.

    Args:
        experimentation_folder (str): indicates the experimentation folder name. This should just contain the name
            of the folder not a full path.
        round_: the current number of already run rounds minus one. Starts from 0. Defaults to 0.

    Raises:
        PermissionError: cannot create experimentation folder
        OSError: cannot create experimentation folder

    Returns:
        A tuple that contains following instacens
            breakpoint_folder_path: name of the created folder that will contain all data for the current round
            breakpoint_file: name of the file that will contain the state of an experiment.
    """

    breakpoint_folder = "breakpoint_" + str("{:04d}".format(round_))
    breakpoint_folder_path = os.path.join(environ['EXPERIMENTS_DIR'],
                                          experimentation_folder,
                                          breakpoint_folder)

    try:
        os.makedirs(breakpoint_folder_path, exist_ok=True)
    except (PermissionError, OSError) as err:
        logger.error("Can not save breakpoint folder at " +
                     f"{breakpoint_folder_path} due to some error {err}")
        raise
    breakpoint_file = breakpoint_folder + ".json"

    return breakpoint_folder_path, breakpoint_file

copy_file(filepath, breakpoint_path)

Source code in fedbiomed/researcher/filetools.py
def copy_file(filepath: str, breakpoint_path: str) -> str:
    filename = os.path.dirname(filepath)
    file_copy_path = os.path.join( breakpoint_path, filename)
    shutil.copy2(filepath, file_copy_path )
    return file_copy_path

create_exp_folder(experimentation_folder=None)

Creates a folder for the current experiment (ie the current run of the model). Experiment files to keep are stored here: model file, all versions of node parameters, all versions of aggregated parameters, breakpoints. The created folder is a subdirectory of environ[EXPERIMENTS_DIR]

Parameters:

Name Type Description Default
experimentation_folder str

optionaly provide an experimentation folder name. This should just contain the name of the folder not a path. default; if no folder name is given, generate a Experiment_x name where x-1 is the number of experiments already run (x=0 for the first experiment)

None

Returns:

Type Description
str

Experimentation folder

Raises:

Type Description
PermissionError

cannot create experimentation folder

OSError

cannot create experimentation folder

ValueError

bad experimentation_folder argument

Source code in fedbiomed/researcher/filetools.py
def create_exp_folder(experimentation_folder: str = None) -> str:
    """ Creates a folder for the current experiment (ie the current run of the model). Experiment files to keep
    are stored here: model file, all versions of node parameters, all versions of aggregated parameters, breakpoints.
    The created folder is a subdirectory of environ[EXPERIMENTS_DIR]

    Args:
        experimentation_folder (str, optional): optionaly provide an experimentation
            folder name. This should just contain the name of the folder not a path.
            default; if no folder name is given, generate a `Experiment_x` name where `x-1`
            is the number of experiments already run (`x`=0 for the first experiment)

    Returns:
        Experimentation folder

    Raises:
        PermissionError: cannot create experimentation folder
        OSError: cannot create experimentation folder
        ValueError: bad `experimentation_folder` argument
    """
    if not os.path.isdir(environ['EXPERIMENTS_DIR']):
        try:
            os.makedirs(environ['EXPERIMENTS_DIR'], exist_ok=True)
        except (PermissionError, OSError) as err:
            logger.error("Can not save experiment files because " +
                         f"{environ['EXPERIMENTS_DIR']} folder could not be created due to {err}")
            raise
    # if no name is given for the experiment folder we choose one
    if not experimentation_folder:
        # FIXME: improve method robustness (here nb of exp equals nb of files
        # in directory)
        all_files = os.listdir(environ['EXPERIMENTS_DIR'])
        experimentation_folder = "Experiment_" + str("{:04d}".format(len(all_files)))
    else:
        if os.path.basename(experimentation_folder) != experimentation_folder:
            # experimentation folder cannot be a path
            raise ValueError(f"Bad experimentation folder {experimentation_folder} - " +
                             "it cannot be a path")
    try:
        os.makedirs(os.path.join(environ['EXPERIMENTS_DIR'], experimentation_folder),
                    exist_ok=True)
    except (PermissionError, OSError) as err:
        logger.error("Can not save experiment files because " +
                     f"{environ['EXPERIMENTS_DIR']}/{experimentation_folder} " +
                     f"folder could not be created due to {err}")
        raise

    return experimentation_folder

Create a symbolic link in breakpoint_folder_path with a non-existing name derived from basename of file_path. The symbolic link points to the real file targeted by file_path

Parameters:

Name Type Description Default
breakpoint_folder_path str

directory for the source link

required
file_path str

path to the target of the link

required

Returns:

Type Description
str

Path of the created link

Raises:

Type Description
ValueError

bad name for link source or destination

Source code in fedbiomed/researcher/filetools.py
def create_unique_file_link(breakpoint_folder_path: str, file_path: str) -> str:
    """
    Create a symbolic link in `breakpoint_folder_path` with a non-existing name derived from basename of
    `file_path`. The symbolic link points to the real file targeted by `file_path`

    Args:
        breakpoint_folder_path: directory for the source link
        file_path: path to the target of the link

    Returns:
        Path of the created link

    Raises:
        ValueError: bad name for link source or destination
    """

    try:
        real_file_path = os.path.realpath(file_path)
        real_bkpt_folder_path = os.path.realpath(breakpoint_folder_path)
        if not os.path.isdir(real_bkpt_folder_path) \
                or not os.path.isdir(os.path.dirname(real_file_path)):
            raise ValueError

        # - use relative path for link target for portability
        # - link to the real file, not to a link-to-the-file
        link_target = os.path.relpath(real_file_path, start=real_bkpt_folder_path)
    except ValueError as err:
        mess = 'Saving breakpoint error, ' + \
            f'cannot get relative path to {file_path} from {breakpoint_folder_path}, ' + \
            f'due to error {err}'
        logger.error(mess)
        raise

    # heuristic : assume limited set of characters in filename postfix
    re_src_prefix = re.search("(.+)\.[a-zA-Z]+$",
                              os.path.basename(file_path))
    re_src_postfix = re.search(".+(\.[a-zA-Z]+)$",
                               os.path.basename(file_path))
    if not re_src_prefix or not re_src_postfix:
        error_message = f'Saving breakpoint error, bad filename {file_path} gives ' + \
            f'prefix {re_src_prefix} and postfix {re_src_postfix}'
        logger.error(error_message)
        raise ValueError(error_message)

    link_src_prefix = re_src_prefix.group(1)
    link_src_postfix = re_src_postfix.group(1)

    return create_unique_link(breakpoint_folder_path,
                              link_src_prefix, link_src_postfix,
                              link_target)

Find a non-existing name in breakpoint_folder_path and create a symbolic link to a given target name.

Parameters:

Name Type Description Default
breakpoint_folder_path str

directory for the source link

required
link_src_prefix str

beginning of the name for the source link (before unique id)

required
link_src_postfix str

end of the name for the source link (after unique id)

required
link_target_path str

target for the symbolic link

required

Returns:

Type Description
str

Path of the created link

Raises:

Type Description
PermissionError

cannot create symlink

OSError

cannot create symlink

FileExistsError

cannot create symlink

FileNotFoundError

non-existent directory

Source code in fedbiomed/researcher/filetools.py
def create_unique_link(breakpoint_folder_path: str,
                       link_src_prefix: str,
                       link_src_postfix: str,
                       link_target_path: str) -> str:
    """ Find a non-existing name in `breakpoint_folder_path` and create a symbolic link to a given target name.

    Args:
        breakpoint_folder_path: directory for the source link
        link_src_prefix: beginning of the name for the source link (before unique id)
        link_src_postfix: end of the name for the source link (after unique id)
        link_target_path: target for the symbolic link

    Returns:
        Path of the created link

    Raises:
        PermissionError: cannot create symlink
        OSError: cannot create symlink
        FileExistsError: cannot create symlink
        FileNotFoundError : non-existent directory
    """
    stub = 0
    link_src_path = os.path.join(breakpoint_folder_path,
                                 link_src_prefix + link_src_postfix)

    # Need to ensure unique name for link (e.g. when replaying from non-last breakpoint)
    while os.path.exists(link_src_path) or os.path.islink(link_src_path):
        stub += 1
        link_src_path = os.path.join(breakpoint_folder_path,
                                     link_src_prefix + '_' + str("{:02}".format(stub)) + link_src_postfix)
    try:
        os.symlink(link_target_path, link_src_path)
    except(FileExistsError, PermissionError, OSError, FileNotFoundError) as err:
        logger.error(f"Can not create link to experiment file {link_target_path} " +
                     f"from {link_src_path} due to error {err}")
        raise

    return link_src_path

find_breakpoint_path(breakpoint_folder_path=None)

Finds breakpoint folder path and file, depending on if user specifies a specific breakpoint path (unchanged in this case) or not (try to guess the latest breakpoint).

Parameters:

Name Type Description Default
breakpoint_folder_path str

path towards breakpoint. If None, (default), consider the latest breakpoint saved on default path (provided at least one breakpoint exists). Defaults to None.

None

Returns:

Type Description
Tuple[str, str]

With length of two that represents respectively:

  • path to breakpoint folder (unchanged if specified by user)
  • breakpoint file.

Raises:

Type Description
FileNotFoundError

triggered either if breakpoint cannot be found, folder is empty or file cannot be parsed

Source code in fedbiomed/researcher/filetools.py
def find_breakpoint_path(breakpoint_folder_path: str = None) -> Tuple[str, str]:
    """ Finds breakpoint folder path and file, depending on if user specifies a specific breakpoint path (unchanged in
    this case) or not (try to guess the latest breakpoint).

    Args:
        breakpoint_folder_path: path towards breakpoint. If None, (default), consider the latest breakpoint saved on
            default path (provided at least one breakpoint exists). Defaults to None.

    Returns:
        With length of two that represents respectively:

            - path to breakpoint folder (unchanged if specified by user)
            - breakpoint file.

    Raises:
        FileNotFoundError: triggered either if breakpoint cannot be found, folder is empty or file cannot be parsed
    """

    # First, let's test if folder is a real folder path
    if breakpoint_folder_path is None:
        try:
            # retrieve latest experiment

            # for error message
            latest_exp_folder = environ['EXPERIMENTS_DIR'] + "/NO_FOLDER_FOUND"

            # content of breakpoint folder
            experiment_folders = os.listdir(environ['EXPERIMENTS_DIR'])

            latest_exp_folder = _get_latest_file(
                environ['EXPERIMENTS_DIR'],
                experiment_folders,
                only_folder=True)

            latest_exp_folder = os.path.join(environ['EXPERIMENTS_DIR'],
                                             latest_exp_folder)

            bkpt_folders = os.listdir(latest_exp_folder)

            breakpoint_folder_path = _get_latest_file(
                latest_exp_folder,
                bkpt_folders,
                only_folder=True)
            breakpoint_folder_path = os.path.join(latest_exp_folder,
                                                  breakpoint_folder_path)
        except FileNotFoundError as err:
            logger.error("Cannot find latest breakpoint in " + latest_exp_folder +
                         " Are you sure at least one breakpoint is saved there ? " +
                         " - Error: " + str(err))
            raise
    else:
        if not os.path.isdir(breakpoint_folder_path):
            raise FileNotFoundError(
                f"Breakpoint folder {breakpoint_folder_path} is not a directory")

    # check if folder is a valid breakpoint

    #
    # verify the validity of the breakpoint content
    # TODO: be more robust
    all_breakpoint_materials = os.listdir(breakpoint_folder_path)
    if len(all_breakpoint_materials) == 0:
        raise FileNotFoundError(f'Breakpoint folder {breakpoint_folder_path} is empty !')

    state_file = None
    for breakpoint_material in all_breakpoint_materials:
        # look for the json file containing experiment state
        # (it should be named `breakpoint_xx.json`)
        json_match = re.fullmatch(r'breakpoint_\d*\.json',
                                  breakpoint_material)
        # there should be at most one - TODO: verify
        if json_match is not None:
            logger.debug(f"found json file containing states at\
                {breakpoint_material}")
            state_file = breakpoint_material

    if state_file is None:
        message = "Cannot find JSON file containing " + \
            f"model state at {breakpoint_folder_path}. Aborting"
        logger.error(message)
        raise FileNotFoundError(message)

    return breakpoint_folder_path, state_file