mri_dataloader 🧠

Backend functions to load MRI and target data of different datasets.

This is more or less the backend of the data loading process in xai4mri.dataloader.datasets.

Authors: Simon M. Hofmann | Hannah S. Heinrichs
Years: 2023-2024

get_metadata_path 🧠

get_metadata_path(
    project_id: str,
    mri_seq: str,
    regis_mni: int | None,
    path_brain_mask: str | None,
    norm: bool,
    prune_mode: str | None,
    path_to_dataset: str | Path | None,
) -> Path

Get the path to the metadata table of a project's dataset.

Parameters:

Name	Type	Description	Default
`project_id`	`str`	project ID	required
`mri_seq`	`str`	MRI sequence (e.g., 't1w')	required
`regis_mni`	`int \| None`	set when data was transformed to MNI space (1 or 2 mm) or None	required
`path_brain_mask`	`str \| None`	if used, path to the applied brain mask, else None	required
`norm`	`bool`	if images were normalized	required
`prune_mode`	`str \| None`	if not used: None; else pruning mode: "cube" or "max".	required
`path_to_dataset`	`str \| Path \| None`	Optional path to folder containing project data (if not in globally set `cache_dir`)	required

Returns:

Type	Description
`Path`	path to the metadata table of the project dataset

Source code in src/xai4mri/dataloader/mri_dataloader.py

def get_metadata_path(
    project_id: str,
    mri_seq: str,
    regis_mni: int | None,
    path_brain_mask: str | None,
    norm: bool,
    prune_mode: str | None,
    path_to_dataset: str | Path | None,
) -> Path:
    """
    Get the path to the metadata table of a project's dataset.

    :param project_id: project ID
    :param mri_seq: MRI sequence (e.g., 't1w')
    :param regis_mni: set when data was transformed to MNI space (1 or 2 mm) or None
    :param path_brain_mask: if used, path to the applied brain mask, else None
    :param norm: if images were normalized
    :param prune_mode: if not used: None; else pruning mode: "cube" or "max".
    :param path_to_dataset: Optional path to folder containing project data (if not in globally set `cache_dir`)
    :return: path to the metadata table of the project dataset
    """
    mri_set_name = get_mri_set_name(
        project_id=project_id,
        mri_seq=mri_seq,
        regis_mni=regis_mni,
        brain_masked=isinstance(path_brain_mask, str),
        norm=norm,
        prune_mode=prune_mode,
    )

    return Path(
        get_mri_set_path(
            mri_set_name=mri_set_name + "_metadata",
            path_to_folder=path_to_dataset,
        ).replace(".npy", ".csv")
    )

get_mri_set_name 🧠

get_mri_set_name(
    project_id: str,
    mri_seq: str,
    regis_mni: int | None,
    brain_masked: bool,
    norm: bool,
    prune_mode: str | None,
) -> str

Construct a name for the MRI set which is/will be saved as *.pkl object.

The full name describes different pre-processing steps.

Parameters:

Name	Type	Description	Default
`project_id`	`str`	name of the project containing the data set, e.g., lemon, hcp, or other projects	required
`mri_seq`	`str`	MRI sequence	required
`regis_mni`	`int \| None`	registered to MNI space in 1 or 2 mm resolution [int], or None for no registration	required
`brain_masked`	`bool`	brain mask has been applied	required
`norm`	`bool`	if data is normalized	required
`prune_mode`	`str \| None`	if data is pruned: None OR "cube" OR "max"	required

Returns:

Type	Description
`str`	final name of MRI set

Source code in src/xai4mri/dataloader/mri_dataloader.py

def get_mri_set_name(
    project_id: str,
    mri_seq: str,
    regis_mni: int | None,
    brain_masked: bool,
    norm: bool,
    prune_mode: str | None,
) -> str:
    """
    Construct a name for the MRI set which is/will be saved as `*.pkl` object.

    The full name describes different pre-processing steps.

    :param project_id: name of the project containing the data set, e.g., lemon, hcp, or other projects
    :param mri_seq: MRI sequence
    :param regis_mni: registered to MNI space in 1 or 2 mm resolution [int], or None for no registration
    :param brain_masked: brain mask has been applied
    :param norm: if data is normalized
    :param prune_mode: if data is pruned: None OR "cube" OR "max"
    :return: final name of MRI set
    """
    return (
        f"{project_id}_"
        f"{mri_seq}"
        f'{f"-mni{regis_mni}mm" if _check_regis(regis_mni) else ""}'
        f'{"-bm" if brain_masked else ""}'
        f'{"-n" if norm else ""}'
        f'{"-p" + f"{prune_mode[0]}" if isinstance(prune_mode, str) else ""}'
    )

get_mri_set_path 🧠

get_mri_set_path(
    mri_set_name: str,
    path_to_folder: str | Path | None = None,
    as_npy: bool = True,
    as_zip: bool = False,
) -> str

Get the absolute path to the MRI set.

Parameters:

Name	Type	Description	Default
`mri_set_name`	`str`	Name of MRI set (constructed by `get_mri_set_name()`).	required
`path_to_folder`	`str \| Path \| None`	The path where the MRI set is supposed to be located.	`None`
`as_npy`	`bool`	True: Save as a numpy (`.npy`) else as a pickle (`.pkl`) object	`True`
`as_zip`	`bool`	zipped file (`.pkl.gz`; `.npz`)	`False`

Returns:

Type	Description
`str`	absolute path to the MRI set

Source code in src/xai4mri/dataloader/mri_dataloader.py

def get_mri_set_path(
    mri_set_name: str,
    path_to_folder: str | Path | None = None,
    as_npy: bool = True,
    as_zip: bool = False,
) -> str:
    """
    Get the absolute path to the MRI set.

    :param mri_set_name: Name of MRI set (constructed by `get_mri_set_name()`).
    :param path_to_folder: The path where the MRI set is supposed to be located.
    :param as_npy: True: Save as a numpy (`*.npy`) else as a pickle (`*.pkl`) object
    :param as_zip: zipped file (`*.pkl.gz`; `*.npz`)
    :return: absolute path to the MRI set
    """
    suffix = (".npz" if as_zip else ".npy") if as_npy else ".pkl.gz" if as_zip else ".pkl"
    if path_to_folder is None:  # Default: look in cache dir
        mri_set_path = Path(CACHE_DIR, mri_set_name).with_suffix(suffix)
    elif isinstance(path_to_folder, (str, Path)):
        if not Path(path_to_folder).is_dir():
            cprint(string=f"Note: the folder '{path_to_folder}' does not exist.", col="y")
        mri_set_path = Path(path_to_folder, mri_set_name).with_suffix(suffix)
    else:
        raise ValueError("path_to_folder must be path to folder [str|Path] or None.")
    return str(mri_set_path)

get_nifti 🧠

get_nifti(
    mri_path: str | Path, reorient: bool
) -> Nifti1Image

Get NIfTI image from its file path.

This works for both NIfTI [*.nii | *.nii.gz] and MGH [*.mgh | *.mgz] files.

Parameters:

Name	Type	Description	Default
`mri_path`	`str \| Path`	path to an MRI file	required
`reorient`	`bool`	reorient the image to the global project orientation space	required

Returns:

Type	Description
`Nifti1Image`	nibabel Nifti1Image object

Source code in src/xai4mri/dataloader/mri_dataloader.py

def get_nifti(mri_path: str | Path, reorient: bool) -> nib.nifti1.Nifti1Image:
    """
    Get NIfTI image from its file path.

    This works for both NIfTI [`*.nii` | `*.nii.gz`] and MGH [`*.mgh` | `*.mgz`] files.

    :param mri_path: path to an MRI file
    :param reorient: reorient the image to the global project orientation space
    :return: nibabel Nifti1Image object
    """
    nifti_img = nib.load(mri_path)
    # Define input space
    if isinstance(nifti_img, nib.freesurfer.mghformat.MGHImage):
        nifti_img = mgz2nifti(nifti_img)

    if reorient:
        nifti_img = file_to_ref_orientation(image_file=nifti_img)

    return nifti_img

load_file_paths_from_metadata 🧠

load_file_paths_from_metadata(
    sid_list: list[str] | ndarray[str],
    path_to_metadata: str | Path,
    exist_check: bool = True,
) -> tuple[
    ndarray[Any, dtype[str | Path]],
    ndarray[Any, dtype[str]],
]

Load file paths to MRI data from a project's metadata table.

Parameters:

Name	Type	Description	Default
`sid_list`	`list[str] \| ndarray[str]`	List of subject ID's.	required
`path_to_metadata`	`str \| Path`	Path to the metadata table of a project dataset.	required
`exist_check`	`bool`	Check if image files exist.	`True`

Returns:

Type	Description
`tuple[ndarray[Any, dtype[str \| Path]], ndarray[Any, dtype[str]]]`	Array of MRI file paths and ordered list of corresponding subject ID's.

Source code in src/xai4mri/dataloader/mri_dataloader.py

def load_file_paths_from_metadata(
    sid_list: list[str] | np.ndarray[str], path_to_metadata: str | Path, exist_check: bool = True
) -> tuple[np.ndarray[Any, np.dtype[str | Path]], np.ndarray[Any, np.dtype[str]]]:
    """
    Load file paths to MRI data from a project's metadata table.

    :param sid_list: List of subject ID's.
    :param path_to_metadata: Path to the metadata table of a project dataset.
    :param exist_check: Check if image files exist.
    :return: Array of MRI file paths and ordered list of corresponding subject ID's.
    """
    # Load metadata
    metadata_table, processed_col = _prepare_metadata(path_to_metadata=path_to_metadata)
    metadata_table = metadata_table.dropna()  # clean up the metadata table, when it has not been fully processed

    # Check if all SIDs in metadata table
    sids_not_in_metadata = set(sid_list).difference(metadata_table.index)
    if len(sids_not_in_metadata) > 0:
        cprint(
            string=f"Metadata table does not contain all requested SIDs, missing: {sids_not_in_metadata}.\n"
            f"Return only SIDs data which are also in the metadata table.",
            col="y",
        )
        sid_list = [sid for sid in sid_list if sid not in sids_not_in_metadata]

    # Load data
    all_mri_paths = metadata_table.loc[sid_list, processed_col].to_numpy()

    if exist_check and not all(Path(p).is_file() for p in all_mri_paths):
        msg = f"Not all requested files exist in: '{path_to_metadata}' (column: ['{processed_col}'])!"
        raise FileNotFoundError(msg)

    return all_mri_paths, np.array(sid_list)

load_files_from_metadata 🧠

load_files_from_metadata(
    sid_list: list[str] | ndarray[str],
    path_to_metadata: str | Path,
) -> tuple[
    ndarray[Any, dtype[uint8 | float32]],
    ndarray[Any, dtype[str]],
]

Load MRI data from a project's metadata table.

Parameters:

Name	Type	Description	Default
`sid_list`	`list[str] \| ndarray[str]`	List of subject ID's.	required
`path_to_metadata`	`str \| Path`	Path to the metadata table of a project dataset.	required

Returns:

Type	Description
`tuple[ndarray[Any, dtype[uint8 \| float32]], ndarray[Any, dtype[str]]]`	Array of MRI files with the shape `[n_subjects, x, y, z, 1]`, and an ordered list of corresponding subject ID's.

Source code in src/xai4mri/dataloader/mri_dataloader.py

def load_files_from_metadata(
    sid_list: list[str] | np.ndarray[str], path_to_metadata: str | Path
) -> tuple[np.ndarray[Any, np.dtype[np.uint8 | np.float32]], np.ndarray[Any, np.dtype[str]]]:
    """
    Load MRI data from a project's metadata table.

    :param sid_list: List of subject ID's.
    :param path_to_metadata: Path to the metadata table of a project dataset.
    :return: Array of MRI files with the shape `[n_subjects, x, y, z, 1]`,
             and an ordered list of corresponding subject ID's.
    """
    # Load metadata
    metadata_table, processed_col = _prepare_metadata(path_to_metadata=path_to_metadata)
    compress = len([c for c in metadata_table.columns if "clip" in c]) > 0
    norm = "-n-" in Path(path_to_metadata).name
    metadata_table = metadata_table.dropna()  # this cleans up the metadata table, when it has not been fully processed

    # Check if all SIDs in metadata table
    sids_not_in_metadata = set(sid_list).difference(metadata_table.index)
    if len(sids_not_in_metadata) > 0:
        cprint(
            string=f"Metadata table does not contain all given SIDs, missing: {sids_not_in_metadata}.\n"
            f"Return only SIDs data which are also in the metadata table.",
            col="y",
        )
        sid_list = [sid for sid in sid_list if sid not in sids_not_in_metadata]

    # Load data
    all_mri = None
    for sid_idx, sid in tqdm(
        enumerate(sid_list),
        total=len(sid_list),
        desc=f"Loading MRI data for {Path(path_to_metadata).name.split('_metadata.csv')[0]}",
    ):
        path_to_mri = Path(metadata_table.loc[sid, processed_col])
        single_data4d = _load_obj(name=path_to_mri.name, folder=path_to_mri.parent, functimer=False)
        single_data4d = np.expand_dims(single_data4d, axis=0)

        if all_mri is None:
            all_mri = np.empty(
                shape=(
                    len(sid_list),
                    single_data4d.shape[1],
                    single_data4d.shape[2],
                    single_data4d.shape[3],
                ),
                dtype=np.float32,
            )  # init data set (to be filled)
        all_mri[sid_idx, :, :, :] = single_data4d

    # Clip, normalize and, or compress all images
    clip_min = np.nanmean(metadata_table[f"{processed_col}_min_clip"]) if compress else None
    clip_max = np.nanmean(metadata_table[f"{processed_col}_max_clip"]) if compress else None
    all_mri = compress_and_norm(data=all_mri, clip_min=clip_min, clip_max=clip_max, norm=norm)

    # expand to empty dimension (batch_size, x, y, z, channel=1)
    all_mri = np.expand_dims(all_mri, axis=4)

    return all_mri, np.array(sid_list)

mgz2nifti 🧠

mgz2nifti(nib_mgh: MGHImage) -> Nifti1Image

Convert Freesurfer's MGH-NMR [*.mgh | *.mgz] file to NIfTI [*.nii].

Parameters:

Name	Type	Description	Default
`nib_mgh`	`MGHImage`	`nibabel` `MGHImage` object	required

Returns:

Type	Description
`Nifti1Image`	`nibabel` `Nifti1Image` object

Source code in src/xai4mri/dataloader/mri_dataloader.py

def mgz2nifti(nib_mgh: nib.freesurfer.mghformat.MGHImage) -> nib.nifti1.Nifti1Image:
    """
    Convert Freesurfer's MGH-NMR [`*.mgh` | `*.mgz`] file to NIfTI [`*.nii`].

    :param nib_mgh: `nibabel` `MGHImage` object
    :return: `nibabel` `Nifti1Image` object
    """
    return nib.Nifti1Image(dataobj=nib_mgh.get_fdata(caching="unchanged"), affine=nib_mgh.affine)

process_single_mri 🧠

process_single_mri(
    mri_path: str | Path,
    dtype: type = np.float32,
    prune_mode: str | None = "max",
    path_brain_mask: str | Path | None = None,
    regis_mni: int | None = None,
    path_cached_mni: str | Path | None = None,
    verbose: bool = False,
) -> ndarray

Load an individual MRI of an individual subject as a numpy array.

Parameters:

Name	Type	Description	Default
`mri_path`	`str \| Path`	path to the original NIfTI MRI file	required
`dtype`	`type`	data type of returned MRI (default: `np.float32`)	`float32`
`prune_mode`	`str \| None`	if not use: `None`; image pruning reduces zero-padding around the brain: "cube": all axes have the same length; "max": maximally prune all axes independently	`'max'`
`path_brain_mask`	`str \| Path \| None`	path to the brain mask; if no mask should be applied use `None`	`None`
`regis_mni`	`int \| None`	transform MRI to MNI space in 1 or 2 mm resolution [int], or `None` for no registration	`None`
`verbose`	`bool`	be verbose about the process or not	`False`
`path_cached_mni`	`str \| Path \| None`	if a path is provided, save interim file in MNI space to this cache path	`None`

Returns:

Type	Description
`ndarray`	4D numpy array (MRI) of shape `[(empty), x, y, z]`

Source code in src/xai4mri/dataloader/mri_dataloader.py

def process_single_mri(
    mri_path: str | Path,
    dtype: type = np.float32,
    prune_mode: str | None = "max",
    path_brain_mask: str | Path | None = None,
    regis_mni: int | None = None,
    path_cached_mni: str | Path | None = None,
    verbose: bool = False,
) -> np.ndarray:
    """
    Load an individual MRI of an individual subject as a numpy array.

    :param mri_path: path to the original NIfTI MRI file
    :param dtype: data type of returned MRI (default: `np.float32`)
    :param prune_mode: if not use: `None`; image pruning reduces zero-padding around the brain:
                       "cube": all axes have the same length; "max": maximally prune all axes independently
    :param path_brain_mask: path to the brain mask; if no mask should be applied use `None`
    :param regis_mni: transform MRI to MNI space in 1 or 2 mm resolution [int], or `None` for no registration
    :param verbose: be verbose about the process or not
    :param path_cached_mni: if a path is provided, save interim file in MNI space to this cache path
    :return: 4D numpy array (MRI) of shape `[(empty), x, y, z]`
    """
    nifti_img = get_nifti(mri_path=mri_path, reorient=False)  # reorient after registration below

    # Resample to MNI space with 1 or 2 mm resolution
    if _check_regis(regis_mni):
        nifti_img = register_to_mni(
            moving_mri=nifti_img,
            resolution=regis_mni,
            save_path_mni=path_cached_mni,
            type_of_transform="Rigid" if is_mni(img=nifti_img) else "SyN",
            verbose=verbose,
        )

    # Reorient image to global project orientation space
    nifti_img = file_to_ref_orientation(image_file=nifti_img)

    # Get image as numpy array
    data3d = nifti_img.get_fdata(dtype=dtype, caching="unchanged")

    # Prune image, i.e. minimize zero-padding
    if isinstance(prune_mode, str):
        # Set max-axes lengths for pruning
        global_max = get_global_max_axes(nifti_img=nifti_img, per_axis=prune_mode.lower() == "max")
        try:
            data3d = prune_mri(x3d=data3d, make_cube=prune_mode.lower() == "cube", max_axis=global_max)
        except IndexError as e:
            cprint(string=f"\nCould not prune MRI: {mri_path}\n", col="r")
            raise e

    if isinstance(path_brain_mask, (str, Path)):
        bm = nib.load(path_brain_mask).get_fdata(caching="unchanged")  # could pass dtype=np.uint8
        data3d = apply_mask(data=data3d, mask=bm)

    return np.expand_dims(data3d, axis=0)  # now 4d: (*dims, 1)