computational_choice_model 🗿

Computational models of human choice behavior.

The SPoSE model is inspired by Hebart et al. (Nat. Hum Beh., 2020), see Fig.1c. The VICE model is taken from Muttenthaler et al. (arXiv, 2022)

VICE or SPoSE

'VICE rivals or outperforms its predecessor, SPoSE, at predicting human behavior in the odd-one-out triplet task. Furthermore, VICE's object representations are more reproducible and consistent across random initializations.' - Muttenthaler et al. (arXiv, 2022)

Other computational embedding models are in facesim3d.modeling.VGG.

Functions:

Name	Description
`display_representative_faces`	Display representative faces for the first `m` dimensions of the trained sparse model.
`extract_faces_for_spose_dimensions`	Extract the first `n` most representative faces for the first `m` dimensions of the trained SPoSE model.
`extract_faces_for_vice_dimensions`	Extract the first `n` most representative faces for the first `m` dimensions of the trained VICE model.
`plot_weight_matrix`	Plot the weight (i.e., `m`-dimensional embedding) matrix of `VICE` \| `SPoSE`.
`prepare_data_for_spose_and_vice`	Prepare data for `SPoSE` & `VICE` models.

display_representative_faces 🗿

display_representative_faces(
    face_dim_idx_mat: ndarray,
    pilot: bool = PILOT,
    as_grid: bool = True,
    title: str | None = None,
    dim_indices: list | None = None,
    save_path: str | Path | None = None,
) -> None

Display representative faces for the first m dimensions of the trained sparse model.

Parameters:

Name	Type	Description	Default
`face_dim_idx_mat`	`ndarray`	nxm matrix of indices in representative faces per dimension	required
`pilot`	`bool`	True: use pilot data	`PILOT`
`as_grid`	`bool`	plot as image grid	`True`
`title`	`str \| None`	Image or figure title	`None`
`dim_indices`	`list \| None`	list of dimension indices to display	`None`
`save_path`	`str \| Path \| None`	if the path is given, save figure	`None`

Returns:

Type	Description
`None`	None

Source code in code/facesim3d/modeling/computational_choice_model.py

def display_representative_faces(
    face_dim_idx_mat: np.ndarray,
    pilot: bool = params.PILOT,
    as_grid: bool = True,
    title: str | None = None,
    dim_indices: list | None = None,
    save_path: str | Path | None = None,
) -> None:
    """
    Display representative faces for the first `m` dimensions of the trained sparse model.

    :param face_dim_idx_mat: nxm matrix of indices in representative faces per dimension
    :param pilot: True: use pilot data
    :param as_grid: plot as image grid
    :param title: Image or figure title
    :param dim_indices: list of dimension indices to display
    :param save_path: if the path is given, save figure
    :return: None
    """
    # Choose the map for CFD indices
    idx_mapper = (
        partial(pilot_matrix_index_to_head_nr, pilot_version=params.PILOT_VERSION)
        if pilot
        else main_matrix_index_to_head_nr
    )

    fig, axes = None, None  # init
    if as_grid:
        dim_indices = range(face_dim_idx_mat.shape[1]) if dim_indices is None else dim_indices
        fig, axes = plt.subplots(
            *face_dim_idx_mat.shape,
            figsize=(face_dim_idx_mat.shape[1] * 2, face_dim_idx_mat.shape[0] * 2),
            sharex=True,
            sharey=True,
            num=title,
        )

    for dim_i, face_indices in enumerate(face_dim_idx_mat.T):
        print(dim_i, face_indices)

        for i, face_idx in enumerate(face_indices):
            # Map indices back to CFD index
            face_id = idx_mapper(face_idx)

            # Display faces with the strongest weight(s) for each dimension
            if as_grid:
                try:
                    face_img = plt.imread(fname=face_image_path(head_id=face_id))  # load as np.array
                except FileNotFoundError:
                    # This computes the screenshot of the 3D reconstructed face
                    display_face(head_id=face_id, data_mode="3d-reconstructions", interactive=False, verbose=False)
                    face_img = plt.imread(fname=face_image_path(head_id=face_id))

                # Zooming takes a while; now it is done already at earlier stage in display_face()
                # face_img = clipped_zoom(img=face_img, zoom_factor=1.8)  # zoom into image  # noqa: ERA001
                axes[i, dim_i].imshow(face_img)
                if i == 0:
                    axes[i, dim_i].set_title(f"Dimension {dim_indices[dim_i] + 1}")
                axes[i, dim_i].set_xticks([])
                axes[i, dim_i].set_xlabel(face_id)
                axes[i, dim_i].yaxis.set_visible(False)
                for spine in axes[i, dim_i].spines.values():  # remove axes-box around image
                    spine.set_visible(False)
            else:
                # Display face images externally
                # TODO: add somewhere a caption  # noqa: FIX002
                display_face(head_id=face_id)
    if as_grid:
        fig.tight_layout()
        if save_path:
            fn = (
                f"{title}_representative_faces-{face_dim_idx_mat.shape[0]}_"
                f"dims-{str(tuple(np.array(dim_indices) + 1)).replace(' ', '')}"
            )
            cprint(string=f"Saving figure in '{Path(str(save_path), fn)}' ... ", col="b")
            for ext in ["png", "svg"]:
                plt.savefig(fname=Path(save_path, fn).with_suffix("." + ext), dpi=300, format=ext)
            plt.close()

extract_faces_for_spose_dimensions 🗿

extract_faces_for_spose_dimensions(
    session: str,
    n_face: int | None = None,
    m_dims: int | None = None,
    pilot: bool = PILOT,
    return_path: bool = False,
    **kwargs
) -> ndarray | tuple[ndarray, str]

Extract the first n most representative faces for the first m dimensions of the trained SPoSE model.

See Hebart et al. (2020), caption of Fig. 2:

Quote

'The images reflect the objects with the highest weights along those dimensions.'

Parameters:

Name	Type	Description	Default
`session`	`str`	"2D", OR "3D"	required
`n_face`	`int \| None`	[int] restrict the number of faces OR [None] all faces are returned	`None`
`m_dims`	`int \| None`	[int] restrict the number of dimensions (or weights) in the SPoSE model OR [None] all dimensions are returned	`None`
`pilot`	`bool`	True: use pilot data	`PILOT`
`return_path`	`bool`	True: return the path to the weights-file	`False`

Returns:

Type	Description
`ndarray \| tuple[ndarray, str]`	indices of representative faces

Source code in code/facesim3d/modeling/computational_choice_model.py

def extract_faces_for_spose_dimensions(
    session: str,
    n_face: int | None = None,
    m_dims: int | None = None,
    pilot: bool = params.PILOT,
    return_path: bool = False,
    **kwargs,
) -> np.ndarray | tuple[np.ndarray, str]:
    """
    Extract the first `n` most representative faces for the first `m` dimensions of the trained SPoSE model.

    See Hebart et al. (2020), caption of Fig. 2:

    !!! quote
        'The images reflect the objects with the highest weights along those dimensions.'

    :param session: "2D", OR "3D"
    :param n_face: [int] restrict the number of faces OR [None] all faces are returned
    :param m_dims:  [int] restrict the number of dimensions (or weights) in the SPoSE model OR
                    [None] all dimensions are returned
    :param pilot: True: use pilot data
    :param return_path: True: return the path to the weights-file
    :return: indices of representative faces
    """
    # Load weights
    p2_weights = ""  # init
    weights = load_spose_weights(session=session, pilot=pilot, return_path=return_path, **kwargs)
    if return_path:
        weights, p2_weights = weights

    # Extract representative faces
    face_dim_idx_mat = np.argsort(weights, axis=0)[::-1][:n_face, :m_dims]
    # rows: index of most representative faces (descending) | cols: most relevant dimensions (descending)
    # E.g., face_img_idx[1, 0] = index of second most representative face for first dimension

    if return_path:
        return face_dim_idx_mat, p2_weights
    return face_dim_idx_mat

extract_faces_for_vice_dimensions 🗿

extract_faces_for_vice_dimensions(
    session: str,
    n_face: int | None = None,
    m_dims: int | None = None,
    pilot: bool = PILOT,
    pruned: bool = True,
    return_path: bool = False,
    param_path: str | None = "",
) -> ndarray | tuple[ndarray, str]

Extract the first n most representative faces for the first m dimensions of the trained VICE model.

See Muttenthaler et al. (arXiv, 2022), p.19, Section F "Interpretability":

Quote

'Objects were sorted in descending order according to their absolute embedding value.'

Parameters:

Name	Type	Description	Default
`session`	`str`	"2D", OR "3D"	required
`n_face`	`int \| None`	[int] restrict the number of faces OR [None] all faces are returned	`None`
`m_dims`	`int \| None`	[int] restrict the number of dimensions (or weights) of the VICE model OR [None] all dimensions are returned	`None`
`pilot`	`bool`	True: use pilot data	`PILOT`
`pruned`	`bool`	True: return the pruned parameters	`True`
`return_path`	`bool`	True: return path to the parameter file	`False`
`param_path`	`str \| None`	path to weight file, defined by the corresponding VICE params (after /[session]/..)	`''`

Returns:

Type	Description
`ndarray \| tuple[ndarray, str]`	indices of representative faces

Source code in code/facesim3d/modeling/computational_choice_model.py

def extract_faces_for_vice_dimensions(
    session: str,
    n_face: int | None = None,
    m_dims: int | None = None,
    pilot: bool = params.PILOT,
    pruned: bool = True,
    return_path: bool = False,
    param_path: str | None = "",
) -> np.ndarray | tuple[np.ndarray, str]:
    """
    Extract the first `n` most representative faces for the first `m` dimensions of the trained VICE model.

    See Muttenthaler et al. (arXiv, 2022), p.19, Section F "Interpretability":

    !!! quote
        'Objects were sorted in descending order according to their absolute embedding value.'

    :param session: "2D", OR "3D"
    :param n_face: [int] restrict the number of faces OR [None] all faces are returned
    :param m_dims:  [int] restrict the number of dimensions (or weights) of the VICE model OR
                    [None] all dimensions are returned
    :param pilot: True: use pilot data
    :param pruned: True: return the pruned parameters
    :param return_path: True: return path to the parameter file
    :param param_path: path to weight file, defined by the corresponding VICE params (after /[session]/..)
    :return: indices of representative faces
    """
    # Load weights
    p2_weights = ""  # init
    weights = load_vice_weights(
        session=session, pilot=pilot, pruned=pruned, return_path=return_path, param_path=param_path
    )
    if return_path:
        loc_param, _, p2_weights = weights  # scale_param = _
    else:
        loc_param, _ = weights  # _ = scale_param

    # # Extract representative faces
    # In the paper they report taking the 'absolute embedding value' to sort objects.
    # However, in facesim3d.modeling.VICE.visualization.plot_topk_objects_per_dimension(), objects are
    # just sorted based on the corresponding weight-value (mu, not sigma), so we do this here, too.
    # Bt also check out: np.linalg.norm(loc_param, axis=0), dimensions are here semi-sorted.
    n_face = loc_param.shape[0] if n_face is None else n_face
    m_dims = loc_param.shape[1] if m_dims is None else m_dims
    face_dim_idx_mat = np.argsort(loc_param, axis=0)[::-1][:n_face, :m_dims]
    # face_dim_idx_mat = np.argsort(np.abs(loc_param), axis=0)[::-1][:n_face, :m_dims]  # noqa: ERA001
    # Note, that taking the absolute didn't change the results anyway (tested for '2D')
    # rows: index of most representative faces (descending) | cols: most relevant dimensions (descending)
    # E.g., face_img_idx[1, 0] = index of second most representative face for first dimension

    if return_path:
        return face_dim_idx_mat, p2_weights
    return face_dim_idx_mat

plot_weight_matrix 🗿

plot_weight_matrix(
    weights: ndarray,
    norm: bool,
    fig_name: str,
    save: bool = False,
    save_path: str | Path | None = "",
)

Plot the weight (i.e., m-dimensional embedding) matrix of VICE | SPoSE.

Source code in code/facesim3d/modeling/computational_choice_model.py

def plot_weight_matrix(
    weights: np.ndarray, norm: bool, fig_name: str, save: bool = False, save_path: str | Path | None = ""
):
    """Plot the weight (i.e., `m`-dimensional embedding) matrix of `VICE` | `SPoSE`."""
    if norm:
        weights /= np.abs(weights).max()

    plt.matshow(weights, cmap="seismic", fignum=fig_name)  # could reduce to M_DIMENSIONS
    plt.colorbar()
    plt.tight_layout()

    if save:
        for ext in ["png", "svg"]:
            plt.savefig(Path(save_path, f"{fig_name}.{ext}"), dpi=300, format=ext)
        plt.close()
    else:
        plt.show()

prepare_data_for_spose_and_vice 🗿

prepare_data_for_spose_and_vice(
    session: str,
    percentage: int | None = None,
    gender: bool | str = False,
    pilot: bool = PILOT,
) -> None

Prepare data for SPoSE & VICE models.

Quote

(...) triplets are expected to be in the format N x 3, where N = number of trials (e.g., 100k) and 3 refers to the triplets, where col_0 = anchor_1, col_1 = anchor_2, col_2 = odd one out. Triplet data must be split into train and test splits, and named train_90.txt and test_10.txt, respectively.

For hyperparameter tuning, prepare only a percentage of the data.

For more information, see the repos of: SPoSE & VICE.

Parameters:

Name	Type	Description	Default
`session`	`str`	'2D', OR '3D'	required
`percentage`	`int \| None`	percentage of data to use (e.g., 10, 20, ...)	`None`
`gender`	`bool \| str`	True: use only triplets of the same gender, respectively. Compute recursively for both genders. OR str: specify the gender 'female' or 'male'.	`False`
`pilot`	`bool`	True: use pilot data	`PILOT`

Source code in code/facesim3d/modeling/computational_choice_model.py

def prepare_data_for_spose_and_vice(
    session: str, percentage: int | None = None, gender: bool | str = False, pilot: bool = params.PILOT
) -> None:
    """
    Prepare data for `SPoSE` & `VICE` models.

    !!! quote
        (...) triplets are expected to be in the format N x 3, where N = number of trials (e.g., 100k) and
        3 refers to the triplets, where col_0 = anchor_1, col_1 = anchor_2, col_2 = odd one out.
        Triplet data must be split into train and test splits, and named `train_90.txt` and `test_10.txt`,
        respectively.

    For hyperparameter tuning, prepare only a percentage of the data.

    For more information, see the repos of:
        [SPoSE](https://github.com/ViCCo-Group/SPoSE) & [VICE](https://github.com/LukasMut/VICE).

    :param session: '2D', OR '3D'
    :param percentage: percentage of data to use (e.g., 10, 20, ...)
    :param gender: True: use only triplets of the same gender, respectively. Compute recursively for both genders.
                   OR str: specify the gender 'female' or 'male'.
    :param pilot: True: use pilot data
    """
    # Check for gender specification
    if gender is True:
        prepare_data_for_spose_and_vice(session=session, percentage=percentage, gender="female", pilot=pilot)
        prepare_data_for_spose_and_vice(session=session, percentage=percentage, gender="male", pilot=pilot)
        return  # stop here

    if isinstance(gender, str):
        gender = gender.lower()
        if gender not in {"female", "male"}:
            msg = "Gender must be 'female' OR 'male'!"
            raise ValueError(msg)

    # Set paths
    spose_data_dir = Path(paths.data.pilot.v2, "for_SPoSE", session) if pilot else Path(paths.data.main.spose, session)
    if gender:
        spose_data_dir /= gender
    if percentage is not None:
        if percentage not in {10, 20, 30, 40, 50}:
            msg = "'percentage' must be in [10, 20, 30, 40, 50]!"
            raise ValueError(msg)
        spose_data_dir /= f"{percentage}perc"
    spose_data_dir.mkdir(parents=True, exist_ok=True)

    p2_training_set = spose_data_dir / "train_90.txt"
    p2_test_set = spose_data_dir / "test_10.txt"

    # Check if data already exists
    if p2_training_set.is_file() and p2_test_set.is_file():
        cprint(string=f"SPoSE & VICE data for {session} already prepared.", col="g")
        return

    # Load data tables
    if pilot:
        data_table = read_pilot_data(clean_trials=True, verbose=False)
        participant_table = read_pilot_participant_data()[["ppid", "group_exp"]]
        # Use data of one session (2D, 3D) only
        participant_table = participant_table.loc[participant_table.group_exp == session]
        data_table = data_table.loc[data_table.ppid.isin(participant_table.ppid)].reset_index(drop=True)
    else:
        data_table = read_trial_results_of_session(session=session, clean_trials=True, verbose=False)

    # Prepare training tables
    data_table = data_table[["head1", "head2", "head3", "head_odd"]]
    data_table = data_table.drop(
        index=data_table.loc[data_table.head_odd == 0].index, axis=1
    )  # remove trials w/o judgment
    data_table = data_table.dropna()

    # In the case of gender specification, filter data for gender-specific triplets
    gender_cut = params.main.n_faces // 2  # == 50
    if gender:
        if gender == "female":
            data_table = data_table[(data_table <= gender_cut).all(axis=1)]
        else:
            data_table = data_table[(data_table > gender_cut).all(axis=1)]

    data_table = data_table.astype(int).reset_index(drop=True)

    # Bring the table in the following format: col_0: anchor_1, col_1: anchor_2, col_2: odd-one-out
    for i, row in tqdm(
        iterable=data_table.iterrows(), desc=f"Prepare data for SPoSE & VICE in {session}", total=len(data_table)
    ):
        data_table.iloc[i, 0:3] = pd.value_counts(row, sort=True, ascending=True).index
    data_table = data_table.drop(columns=["head_odd"])
    data_table.columns = ["col_0", "col_1", "col_2"]

    # Replace head number with index
    index_mapper = (
        partial(head_nr_to_pilot_matrix_index, pilot_version=params.PILOT_VERSION)
        if pilot
        else head_nr_to_main_matrix_index
    )
    # pilot v2: female: 0-12, male: 13-25
    data_table = data_table.applymap(index_mapper)
    # for main: == data_table = data_table - 1

    if gender == "male":
        # When we have male-only triplets, we need to re-index the heads starting from 0 (instead of 50)
        data_table -= gender_cut

    sampled_index = None  # init
    if percentage is not None:
        data_table = data_table.sample(frac=percentage / 100)
        sampled_index = data_table.index
        data_table = data_table.reset_index(drop=True)

    # Extract training and test set (9-1-Ratio)
    training_set = data_table.sample(frac=0.9)
    test_set = data_table.drop(index=training_set.index)

    # Save training and test
    training_set.to_csv(p2_training_set, index=False, header=False, sep=" ")
    test_set.to_csv(p2_test_set, index=False, header=False, sep=" ")
    # Note: SPoSe takes .npy files as input, too
    np.save(file=p2_training_set.with_suffix(".npy"), arr=training_set.to_numpy())
    np.save(file=p2_test_set.with_suffix(".npy"), arr=test_set.to_numpy())

    if percentage is not None:
        # Save sampled index for a fraction of data
        np.save(file=spose_data_dir / "sampled_index.npy", arr=sampled_index)