Skip to content

mri_dataloader 🧠

Backend functions to load MRI and target data of different datasets.

This is more or less the backend of the data loading process in xai4mri.dataloader.datasets.

Authors: Simon M. Hofmann | Hannah S. Heinrichs
Years: 2023-2024

get_metadata_path 🧠

get_metadata_path(
    project_id: str,
    mri_seq: str,
    regis_mni: int | None,
    path_brain_mask: str | None,
    norm: bool,
    prune_mode: str | None,
    path_to_dataset: str | Path | None,
) -> Path

Get the path to the metadata table of a project's dataset.

Parameters:

Name Type Description Default
project_id str

project ID

required
mri_seq str

MRI sequence (e.g., 't1w')

required
regis_mni int | None

set when data was transformed to MNI space (1 or 2 mm) or None

required
path_brain_mask str | None

if used, path to the applied brain mask, else None

required
norm bool

if images were normalized

required
prune_mode str | None

if not used: None; else pruning mode: "cube" or "max".

required
path_to_dataset str | Path | None

Optional path to folder containing project data (if not in globally set cache_dir)

required

Returns:

Type Description
Path

path to the metadata table of the project dataset

Source code in src/xai4mri/dataloader/mri_dataloader.py
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def get_metadata_path(
    project_id: str,
    mri_seq: str,
    regis_mni: int | None,
    path_brain_mask: str | None,
    norm: bool,
    prune_mode: str | None,
    path_to_dataset: str | Path | None,
) -> Path:
    """
    Get the path to the metadata table of a project's dataset.

    :param project_id: project ID
    :param mri_seq: MRI sequence (e.g., 't1w')
    :param regis_mni: set when data was transformed to MNI space (1 or 2 mm) or None
    :param path_brain_mask: if used, path to the applied brain mask, else None
    :param norm: if images were normalized
    :param prune_mode: if not used: None; else pruning mode: "cube" or "max".
    :param path_to_dataset: Optional path to folder containing project data (if not in globally set `cache_dir`)
    :return: path to the metadata table of the project dataset
    """
    mri_set_name = get_mri_set_name(
        project_id=project_id,
        mri_seq=mri_seq,
        regis_mni=regis_mni,
        brain_masked=isinstance(path_brain_mask, str),
        norm=norm,
        prune_mode=prune_mode,
    )

    return Path(
        get_mri_set_path(
            mri_set_name=mri_set_name + "_metadata",
            path_to_folder=path_to_dataset,
        ).replace(".npy", ".csv")
    )

get_mri_set_name 🧠

get_mri_set_name(
    project_id: str,
    mri_seq: str,
    regis_mni: int | None,
    brain_masked: bool,
    norm: bool,
    prune_mode: str | None,
) -> str

Construct a name for the MRI set which is/will be saved as *.pkl object.

The full name describes different pre-processing steps.

Parameters:

Name Type Description Default
project_id str

name of the project containing the data set, e.g., lemon, hcp, or other projects

required
mri_seq str

MRI sequence

required
regis_mni int | None

registered to MNI space in 1 or 2 mm resolution [int], or None for no registration

required
brain_masked bool

brain mask has been applied

required
norm bool

if data is normalized

required
prune_mode str | None

if data is pruned: None OR "cube" OR "max"

required

Returns:

Type Description
str

final name of MRI set

Source code in src/xai4mri/dataloader/mri_dataloader.py
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
def get_mri_set_name(
    project_id: str,
    mri_seq: str,
    regis_mni: int | None,
    brain_masked: bool,
    norm: bool,
    prune_mode: str | None,
) -> str:
    """
    Construct a name for the MRI set which is/will be saved as `*.pkl` object.

    The full name describes different pre-processing steps.

    :param project_id: name of the project containing the data set, e.g., lemon, hcp, or other projects
    :param mri_seq: MRI sequence
    :param regis_mni: registered to MNI space in 1 or 2 mm resolution [int], or None for no registration
    :param brain_masked: brain mask has been applied
    :param norm: if data is normalized
    :param prune_mode: if data is pruned: None OR "cube" OR "max"
    :return: final name of MRI set
    """
    return (
        f"{project_id}_"
        f"{mri_seq}"
        f'{f"-mni{regis_mni}mm" if _check_regis(regis_mni) else ""}'
        f'{"-bm" if brain_masked else ""}'
        f'{"-n" if norm else ""}'
        f'{"-p" + f"{prune_mode[0]}" if isinstance(prune_mode, str) else ""}'
    )

get_mri_set_path 🧠

get_mri_set_path(
    mri_set_name: str,
    path_to_folder: str | Path | None = None,
    as_npy: bool = True,
    as_zip: bool = False,
) -> str

Get the absolute path to the MRI set.

Parameters:

Name Type Description Default
mri_set_name str

Name of MRI set (constructed by get_mri_set_name()).

required
path_to_folder str | Path | None

The path where the MRI set is supposed to be located.

None
as_npy bool

True: Save as a numpy (*.npy) else as a pickle (*.pkl) object

True
as_zip bool

zipped file (*.pkl.gz; *.npz)

False

Returns:

Type Description
str

absolute path to the MRI set

Source code in src/xai4mri/dataloader/mri_dataloader.py
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
def get_mri_set_path(
    mri_set_name: str,
    path_to_folder: str | Path | None = None,
    as_npy: bool = True,
    as_zip: bool = False,
) -> str:
    """
    Get the absolute path to the MRI set.

    :param mri_set_name: Name of MRI set (constructed by `get_mri_set_name()`).
    :param path_to_folder: The path where the MRI set is supposed to be located.
    :param as_npy: True: Save as a numpy (`*.npy`) else as a pickle (`*.pkl`) object
    :param as_zip: zipped file (`*.pkl.gz`; `*.npz`)
    :return: absolute path to the MRI set
    """
    suffix = (".npz" if as_zip else ".npy") if as_npy else ".pkl.gz" if as_zip else ".pkl"
    if path_to_folder is None:  # Default: look in cache dir
        mri_set_path = Path(CACHE_DIR, mri_set_name).with_suffix(suffix)
    elif isinstance(path_to_folder, (str, Path)):
        if not Path(path_to_folder).is_dir():
            cprint(string=f"Note: the folder '{path_to_folder}' does not exist.", col="y")
        mri_set_path = Path(path_to_folder, mri_set_name).with_suffix(suffix)
    else:
        raise ValueError("path_to_folder must be path to folder [str|Path] or None.")
    return str(mri_set_path)

get_nifti 🧠

get_nifti(
    mri_path: str | Path, reorient: bool
) -> Nifti1Image

Get NIfTI image from its file path.

This works for both NIfTI [*.nii | *.nii.gz] and MGH [*.mgh | *.mgz] files.

Parameters:

Name Type Description Default
mri_path str | Path

path to an MRI file

required
reorient bool

reorient the image to the global project orientation space

required

Returns:

Type Description
Nifti1Image

nibabel Nifti1Image object

Source code in src/xai4mri/dataloader/mri_dataloader.py
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
def get_nifti(mri_path: str | Path, reorient: bool) -> nib.nifti1.Nifti1Image:
    """
    Get NIfTI image from its file path.

    This works for both NIfTI [`*.nii` | `*.nii.gz`] and MGH [`*.mgh` | `*.mgz`] files.

    :param mri_path: path to an MRI file
    :param reorient: reorient the image to the global project orientation space
    :return: nibabel Nifti1Image object
    """
    nifti_img = nib.load(mri_path)
    # Define input space
    if isinstance(nifti_img, nib.freesurfer.mghformat.MGHImage):
        nifti_img = mgz2nifti(nifti_img)

    if reorient:
        nifti_img = file_to_ref_orientation(image_file=nifti_img)

    return nifti_img

load_file_paths_from_metadata 🧠

load_file_paths_from_metadata(
    sid_list: list[str] | ndarray[str],
    path_to_metadata: str | Path,
    exist_check: bool = True,
) -> tuple[
    ndarray[Any, dtype[str | Path]],
    ndarray[Any, dtype[str]],
]

Load file paths to MRI data from a project's metadata table.

Parameters:

Name Type Description Default
sid_list list[str] | ndarray[str]

List of subject ID's.

required
path_to_metadata str | Path

Path to the metadata table of a project dataset.

required
exist_check bool

Check if image files exist.

True

Returns:

Type Description
tuple[ndarray[Any, dtype[str | Path]], ndarray[Any, dtype[str]]]

Array of MRI file paths and ordered list of corresponding subject ID's.

Source code in src/xai4mri/dataloader/mri_dataloader.py
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
def load_file_paths_from_metadata(
    sid_list: list[str] | np.ndarray[str], path_to_metadata: str | Path, exist_check: bool = True
) -> tuple[np.ndarray[Any, np.dtype[str | Path]], np.ndarray[Any, np.dtype[str]]]:
    """
    Load file paths to MRI data from a project's metadata table.

    :param sid_list: List of subject ID's.
    :param path_to_metadata: Path to the metadata table of a project dataset.
    :param exist_check: Check if image files exist.
    :return: Array of MRI file paths and ordered list of corresponding subject ID's.
    """
    # Load metadata
    metadata_table, processed_col = _prepare_metadata(path_to_metadata=path_to_metadata)
    metadata_table = metadata_table.dropna()  # clean up the metadata table, when it has not been fully processed

    # Check if all SIDs in metadata table
    sids_not_in_metadata = set(sid_list).difference(metadata_table.index)
    if len(sids_not_in_metadata) > 0:
        cprint(
            string=f"Metadata table does not contain all requested SIDs, missing: {sids_not_in_metadata}.\n"
            f"Return only SIDs data which are also in the metadata table.",
            col="y",
        )
        sid_list = [sid for sid in sid_list if sid not in sids_not_in_metadata]

    # Load data
    all_mri_paths = metadata_table.loc[sid_list, processed_col].to_numpy()

    if exist_check and not all(Path(p).is_file() for p in all_mri_paths):
        msg = f"Not all requested files exist in: '{path_to_metadata}' (column: ['{processed_col}'])!"
        raise FileNotFoundError(msg)

    return all_mri_paths, np.array(sid_list)

load_files_from_metadata 🧠

load_files_from_metadata(
    sid_list: list[str] | ndarray[str],
    path_to_metadata: str | Path,
) -> tuple[
    ndarray[Any, dtype[uint8 | float32]],
    ndarray[Any, dtype[str]],
]

Load MRI data from a project's metadata table.

Parameters:

Name Type Description Default
sid_list list[str] | ndarray[str]

List of subject ID's.

required
path_to_metadata str | Path

Path to the metadata table of a project dataset.

required

Returns:

Type Description
tuple[ndarray[Any, dtype[uint8 | float32]], ndarray[Any, dtype[str]]]

Array of MRI files with the shape [n_subjects, x, y, z, 1], and an ordered list of corresponding subject ID's.

Source code in src/xai4mri/dataloader/mri_dataloader.py
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
def load_files_from_metadata(
    sid_list: list[str] | np.ndarray[str], path_to_metadata: str | Path
) -> tuple[np.ndarray[Any, np.dtype[np.uint8 | np.float32]], np.ndarray[Any, np.dtype[str]]]:
    """
    Load MRI data from a project's metadata table.

    :param sid_list: List of subject ID's.
    :param path_to_metadata: Path to the metadata table of a project dataset.
    :return: Array of MRI files with the shape `[n_subjects, x, y, z, 1]`,
             and an ordered list of corresponding subject ID's.
    """
    # Load metadata
    metadata_table, processed_col = _prepare_metadata(path_to_metadata=path_to_metadata)
    compress = len([c for c in metadata_table.columns if "clip" in c]) > 0
    norm = "-n-" in Path(path_to_metadata).name
    metadata_table = metadata_table.dropna()  # this cleans up the metadata table, when it has not been fully processed

    # Check if all SIDs in metadata table
    sids_not_in_metadata = set(sid_list).difference(metadata_table.index)
    if len(sids_not_in_metadata) > 0:
        cprint(
            string=f"Metadata table does not contain all given SIDs, missing: {sids_not_in_metadata}.\n"
            f"Return only SIDs data which are also in the metadata table.",
            col="y",
        )
        sid_list = [sid for sid in sid_list if sid not in sids_not_in_metadata]

    # Load data
    all_mri = None
    for sid_idx, sid in tqdm(
        enumerate(sid_list),
        total=len(sid_list),
        desc=f"Loading MRI data for {Path(path_to_metadata).name.split('_metadata.csv')[0]}",
    ):
        path_to_mri = Path(metadata_table.loc[sid, processed_col])
        single_data4d = _load_obj(name=path_to_mri.name, folder=path_to_mri.parent, functimer=False)
        single_data4d = np.expand_dims(single_data4d, axis=0)

        if all_mri is None:
            all_mri = np.empty(
                shape=(
                    len(sid_list),
                    single_data4d.shape[1],
                    single_data4d.shape[2],
                    single_data4d.shape[3],
                ),
                dtype=np.float32,
            )  # init data set (to be filled)
        all_mri[sid_idx, :, :, :] = single_data4d

    # Clip, normalize and, or compress all images
    clip_min = np.nanmean(metadata_table[f"{processed_col}_min_clip"]) if compress else None
    clip_max = np.nanmean(metadata_table[f"{processed_col}_max_clip"]) if compress else None
    all_mri = compress_and_norm(data=all_mri, clip_min=clip_min, clip_max=clip_max, norm=norm)

    # expand to empty dimension (batch_size, x, y, z, channel=1)
    all_mri = np.expand_dims(all_mri, axis=4)

    return all_mri, np.array(sid_list)

mgz2nifti 🧠

mgz2nifti(nib_mgh: MGHImage) -> Nifti1Image

Convert Freesurfer's MGH-NMR [*.mgh | *.mgz] file to NIfTI [*.nii].

Parameters:

Name Type Description Default
nib_mgh MGHImage

nibabel MGHImage object

required

Returns:

Type Description
Nifti1Image

nibabel Nifti1Image object

Source code in src/xai4mri/dataloader/mri_dataloader.py
448
449
450
451
452
453
454
455
def mgz2nifti(nib_mgh: nib.freesurfer.mghformat.MGHImage) -> nib.nifti1.Nifti1Image:
    """
    Convert Freesurfer's MGH-NMR [`*.mgh` | `*.mgz`] file to NIfTI [`*.nii`].

    :param nib_mgh: `nibabel` `MGHImage` object
    :return: `nibabel` `Nifti1Image` object
    """
    return nib.Nifti1Image(dataobj=nib_mgh.get_fdata(caching="unchanged"), affine=nib_mgh.affine)

process_single_mri 🧠

process_single_mri(
    mri_path: str | Path,
    dtype: type = np.float32,
    prune_mode: str | None = "max",
    path_brain_mask: str | Path | None = None,
    regis_mni: int | None = None,
    path_cached_mni: str | Path | None = None,
    verbose: bool = False,
) -> ndarray

Load an individual MRI of an individual subject as a numpy array.

Parameters:

Name Type Description Default
mri_path str | Path

path to the original NIfTI MRI file

required
dtype type

data type of returned MRI (default: np.float32)

float32
prune_mode str | None

if not use: None; image pruning reduces zero-padding around the brain: "cube": all axes have the same length; "max": maximally prune all axes independently

'max'
path_brain_mask str | Path | None

path to the brain mask; if no mask should be applied use None

None
regis_mni int | None

transform MRI to MNI space in 1 or 2 mm resolution [int], or None for no registration

None
verbose bool

be verbose about the process or not

False
path_cached_mni str | Path | None

if a path is provided, save interim file in MNI space to this cache path

None

Returns:

Type Description
ndarray

4D numpy array (MRI) of shape [(empty), x, y, z]

Source code in src/xai4mri/dataloader/mri_dataloader.py
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
def process_single_mri(
    mri_path: str | Path,
    dtype: type = np.float32,
    prune_mode: str | None = "max",
    path_brain_mask: str | Path | None = None,
    regis_mni: int | None = None,
    path_cached_mni: str | Path | None = None,
    verbose: bool = False,
) -> np.ndarray:
    """
    Load an individual MRI of an individual subject as a numpy array.

    :param mri_path: path to the original NIfTI MRI file
    :param dtype: data type of returned MRI (default: `np.float32`)
    :param prune_mode: if not use: `None`; image pruning reduces zero-padding around the brain:
                       "cube": all axes have the same length; "max": maximally prune all axes independently
    :param path_brain_mask: path to the brain mask; if no mask should be applied use `None`
    :param regis_mni: transform MRI to MNI space in 1 or 2 mm resolution [int], or `None` for no registration
    :param verbose: be verbose about the process or not
    :param path_cached_mni: if a path is provided, save interim file in MNI space to this cache path
    :return: 4D numpy array (MRI) of shape `[(empty), x, y, z]`
    """
    nifti_img = get_nifti(mri_path=mri_path, reorient=False)  # reorient after registration below

    # Resample to MNI space with 1 or 2 mm resolution
    if _check_regis(regis_mni):
        nifti_img = register_to_mni(
            moving_mri=nifti_img,
            resolution=regis_mni,
            save_path_mni=path_cached_mni,
            type_of_transform="Rigid" if is_mni(img=nifti_img) else "SyN",
            verbose=verbose,
        )

    # Reorient image to global project orientation space
    nifti_img = file_to_ref_orientation(image_file=nifti_img)

    # Get image as numpy array
    data3d = nifti_img.get_fdata(dtype=dtype, caching="unchanged")

    # Prune image, i.e. minimize zero-padding
    if isinstance(prune_mode, str):
        # Set max-axes lengths for pruning
        global_max = get_global_max_axes(nifti_img=nifti_img, per_axis=prune_mode.lower() == "max")
        try:
            data3d = prune_mri(x3d=data3d, make_cube=prune_mode.lower() == "cube", max_axis=global_max)
        except IndexError as e:
            cprint(string=f"\nCould not prune MRI: {mri_path}\n", col="r")
            raise e

    if isinstance(path_brain_mask, (str, Path)):
        bm = nib.load(path_brain_mask).get_fdata(caching="unchanged")  # could pass dtype=np.uint8
        data3d = apply_mask(data=data3d, mask=bm)

    return np.expand_dims(data3d, axis=0)  # now 4d: (*dims, 1)