Skip to content

computational_choice_model 🗿

Computational models of human choice behavior.

The SPoSE model is inspired by Hebart et al. (Nat. Hum Beh., 2020), see Fig.1c. The VICE model is taken from Muttenthaler et al. (arXiv, 2022)

VICE or SPoSE

'VICE rivals or outperforms its predecessor, SPoSE, at predicting human behavior in the odd-one-out triplet task. Furthermore, VICE's object representations are more reproducible and consistent across random initializations.' - Muttenthaler et al. (arXiv, 2022)

Other computational embedding models are in facesim3d.modeling.VGG.

Functions:

Name Description
display_representative_faces

Display representative faces for the first m dimensions of the trained sparse model.

extract_faces_for_spose_dimensions

Extract the first n most representative faces for the first m dimensions of the trained SPoSE model.

extract_faces_for_vice_dimensions

Extract the first n most representative faces for the first m dimensions of the trained VICE model.

plot_weight_matrix

Plot the weight (i.e., m-dimensional embedding) matrix of VICE | SPoSE.

prepare_data_for_spose_and_vice

Prepare data for SPoSE & VICE models.

display_representative_faces 🗿

display_representative_faces(
    face_dim_idx_mat: ndarray,
    pilot: bool = PILOT,
    as_grid: bool = True,
    title: str | None = None,
    dim_indices: list | None = None,
    save_path: str | Path | None = None,
) -> None

Display representative faces for the first m dimensions of the trained sparse model.

Parameters:

Name Type Description Default
face_dim_idx_mat ndarray

nxm matrix of indices in representative faces per dimension

required
pilot bool

True: use pilot data

PILOT
as_grid bool

plot as image grid

True
title str | None

Image or figure title

None
dim_indices list | None

list of dimension indices to display

None
save_path str | Path | None

if the path is given, save figure

None

Returns:

Type Description
None

None

Source code in code/facesim3d/modeling/computational_choice_model.py
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
def display_representative_faces(
    face_dim_idx_mat: np.ndarray,
    pilot: bool = params.PILOT,
    as_grid: bool = True,
    title: str | None = None,
    dim_indices: list | None = None,
    save_path: str | Path | None = None,
) -> None:
    """
    Display representative faces for the first `m` dimensions of the trained sparse model.

    :param face_dim_idx_mat: nxm matrix of indices in representative faces per dimension
    :param pilot: True: use pilot data
    :param as_grid: plot as image grid
    :param title: Image or figure title
    :param dim_indices: list of dimension indices to display
    :param save_path: if the path is given, save figure
    :return: None
    """
    # Choose the map for CFD indices
    idx_mapper = (
        partial(pilot_matrix_index_to_head_nr, pilot_version=params.PILOT_VERSION)
        if pilot
        else main_matrix_index_to_head_nr
    )

    fig, axes = None, None  # init
    if as_grid:
        dim_indices = range(face_dim_idx_mat.shape[1]) if dim_indices is None else dim_indices
        fig, axes = plt.subplots(
            *face_dim_idx_mat.shape,
            figsize=(face_dim_idx_mat.shape[1] * 2, face_dim_idx_mat.shape[0] * 2),
            sharex=True,
            sharey=True,
            num=title,
        )

    for dim_i, face_indices in enumerate(face_dim_idx_mat.T):
        print(dim_i, face_indices)

        for i, face_idx in enumerate(face_indices):
            # Map indices back to CFD index
            face_id = idx_mapper(face_idx)

            # Display faces with the strongest weight(s) for each dimension
            if as_grid:
                try:
                    face_img = plt.imread(fname=face_image_path(head_id=face_id))  # load as np.array
                except FileNotFoundError:
                    # This computes the screenshot of the 3D reconstructed face
                    display_face(head_id=face_id, data_mode="3d-reconstructions", interactive=False, verbose=False)
                    face_img = plt.imread(fname=face_image_path(head_id=face_id))

                # Zooming takes a while; now it is done already at earlier stage in display_face()
                # face_img = clipped_zoom(img=face_img, zoom_factor=1.8)  # zoom into image  # noqa: ERA001
                axes[i, dim_i].imshow(face_img)
                if i == 0:
                    axes[i, dim_i].set_title(f"Dimension {dim_indices[dim_i] + 1}")
                axes[i, dim_i].set_xticks([])
                axes[i, dim_i].set_xlabel(face_id)
                axes[i, dim_i].yaxis.set_visible(False)
                for spine in axes[i, dim_i].spines.values():  # remove axes-box around image
                    spine.set_visible(False)
            else:
                # Display face images externally
                # TODO: add somewhere a caption  # noqa: FIX002
                display_face(head_id=face_id)
    if as_grid:
        fig.tight_layout()
        if save_path:
            fn = (
                f"{title}_representative_faces-{face_dim_idx_mat.shape[0]}_"
                f"dims-{str(tuple(np.array(dim_indices) + 1)).replace(' ', '')}"
            )
            cprint(string=f"Saving figure in '{Path(str(save_path), fn)}' ... ", col="b")
            for ext in ["png", "svg"]:
                plt.savefig(fname=Path(save_path, fn).with_suffix("." + ext), dpi=300, format=ext)
            plt.close()

extract_faces_for_spose_dimensions 🗿

extract_faces_for_spose_dimensions(
    session: str,
    n_face: int | None = None,
    m_dims: int | None = None,
    pilot: bool = PILOT,
    return_path: bool = False,
    **kwargs
) -> ndarray | tuple[ndarray, str]

Extract the first n most representative faces for the first m dimensions of the trained SPoSE model.

See Hebart et al. (2020), caption of Fig. 2:

Quote

'The images reflect the objects with the highest weights along those dimensions.'

Parameters:

Name Type Description Default
session str

"2D", OR "3D"

required
n_face int | None

[int] restrict the number of faces OR [None] all faces are returned

None
m_dims int | None

[int] restrict the number of dimensions (or weights) in the SPoSE model OR [None] all dimensions are returned

None
pilot bool

True: use pilot data

PILOT
return_path bool

True: return the path to the weights-file

False

Returns:

Type Description
ndarray | tuple[ndarray, str]

indices of representative faces

Source code in code/facesim3d/modeling/computational_choice_model.py
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
def extract_faces_for_spose_dimensions(
    session: str,
    n_face: int | None = None,
    m_dims: int | None = None,
    pilot: bool = params.PILOT,
    return_path: bool = False,
    **kwargs,
) -> np.ndarray | tuple[np.ndarray, str]:
    """
    Extract the first `n` most representative faces for the first `m` dimensions of the trained SPoSE model.

    See Hebart et al. (2020), caption of Fig. 2:

    !!! quote
        'The images reflect the objects with the highest weights along those dimensions.'

    :param session: "2D", OR "3D"
    :param n_face: [int] restrict the number of faces OR [None] all faces are returned
    :param m_dims:  [int] restrict the number of dimensions (or weights) in the SPoSE model OR
                    [None] all dimensions are returned
    :param pilot: True: use pilot data
    :param return_path: True: return the path to the weights-file
    :return: indices of representative faces
    """
    # Load weights
    p2_weights = ""  # init
    weights = load_spose_weights(session=session, pilot=pilot, return_path=return_path, **kwargs)
    if return_path:
        weights, p2_weights = weights

    # Extract representative faces
    face_dim_idx_mat = np.argsort(weights, axis=0)[::-1][:n_face, :m_dims]
    # rows: index of most representative faces (descending) | cols: most relevant dimensions (descending)
    # E.g., face_img_idx[1, 0] = index of second most representative face for first dimension

    if return_path:
        return face_dim_idx_mat, p2_weights
    return face_dim_idx_mat

extract_faces_for_vice_dimensions 🗿

extract_faces_for_vice_dimensions(
    session: str,
    n_face: int | None = None,
    m_dims: int | None = None,
    pilot: bool = PILOT,
    pruned: bool = True,
    return_path: bool = False,
    param_path: str | None = "",
) -> ndarray | tuple[ndarray, str]

Extract the first n most representative faces for the first m dimensions of the trained VICE model.

See Muttenthaler et al. (arXiv, 2022), p.19, Section F "Interpretability":

Quote

'Objects were sorted in descending order according to their absolute embedding value.'

Parameters:

Name Type Description Default
session str

"2D", OR "3D"

required
n_face int | None

[int] restrict the number of faces OR [None] all faces are returned

None
m_dims int | None

[int] restrict the number of dimensions (or weights) of the VICE model OR [None] all dimensions are returned

None
pilot bool

True: use pilot data

PILOT
pruned bool

True: return the pruned parameters

True
return_path bool

True: return path to the parameter file

False
param_path str | None

path to weight file, defined by the corresponding VICE params (after /[session]/..)

''

Returns:

Type Description
ndarray | tuple[ndarray, str]

indices of representative faces

Source code in code/facesim3d/modeling/computational_choice_model.py
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
def extract_faces_for_vice_dimensions(
    session: str,
    n_face: int | None = None,
    m_dims: int | None = None,
    pilot: bool = params.PILOT,
    pruned: bool = True,
    return_path: bool = False,
    param_path: str | None = "",
) -> np.ndarray | tuple[np.ndarray, str]:
    """
    Extract the first `n` most representative faces for the first `m` dimensions of the trained VICE model.

    See Muttenthaler et al. (arXiv, 2022), p.19, Section F "Interpretability":

    !!! quote
        'Objects were sorted in descending order according to their absolute embedding value.'

    :param session: "2D", OR "3D"
    :param n_face: [int] restrict the number of faces OR [None] all faces are returned
    :param m_dims:  [int] restrict the number of dimensions (or weights) of the VICE model OR
                    [None] all dimensions are returned
    :param pilot: True: use pilot data
    :param pruned: True: return the pruned parameters
    :param return_path: True: return path to the parameter file
    :param param_path: path to weight file, defined by the corresponding VICE params (after /[session]/..)
    :return: indices of representative faces
    """
    # Load weights
    p2_weights = ""  # init
    weights = load_vice_weights(
        session=session, pilot=pilot, pruned=pruned, return_path=return_path, param_path=param_path
    )
    if return_path:
        loc_param, _, p2_weights = weights  # scale_param = _
    else:
        loc_param, _ = weights  # _ = scale_param

    # # Extract representative faces
    # In the paper they report taking the 'absolute embedding value' to sort objects.
    # However, in facesim3d.modeling.VICE.visualization.plot_topk_objects_per_dimension(), objects are
    # just sorted based on the corresponding weight-value (mu, not sigma), so we do this here, too.
    # Bt also check out: np.linalg.norm(loc_param, axis=0), dimensions are here semi-sorted.
    n_face = loc_param.shape[0] if n_face is None else n_face
    m_dims = loc_param.shape[1] if m_dims is None else m_dims
    face_dim_idx_mat = np.argsort(loc_param, axis=0)[::-1][:n_face, :m_dims]
    # face_dim_idx_mat = np.argsort(np.abs(loc_param), axis=0)[::-1][:n_face, :m_dims]  # noqa: ERA001
    # Note, that taking the absolute didn't change the results anyway (tested for '2D')
    # rows: index of most representative faces (descending) | cols: most relevant dimensions (descending)
    # E.g., face_img_idx[1, 0] = index of second most representative face for first dimension

    if return_path:
        return face_dim_idx_mat, p2_weights
    return face_dim_idx_mat

plot_weight_matrix 🗿

plot_weight_matrix(
    weights: ndarray,
    norm: bool,
    fig_name: str,
    save: bool = False,
    save_path: str | Path | None = "",
)

Plot the weight (i.e., m-dimensional embedding) matrix of VICE | SPoSE.

Source code in code/facesim3d/modeling/computational_choice_model.py
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
def plot_weight_matrix(
    weights: np.ndarray, norm: bool, fig_name: str, save: bool = False, save_path: str | Path | None = ""
):
    """Plot the weight (i.e., `m`-dimensional embedding) matrix of `VICE` | `SPoSE`."""
    if norm:
        weights /= np.abs(weights).max()

    plt.matshow(weights, cmap="seismic", fignum=fig_name)  # could reduce to M_DIMENSIONS
    plt.colorbar()
    plt.tight_layout()

    if save:
        for ext in ["png", "svg"]:
            plt.savefig(Path(save_path, f"{fig_name}.{ext}"), dpi=300, format=ext)
        plt.close()
    else:
        plt.show()

prepare_data_for_spose_and_vice 🗿

prepare_data_for_spose_and_vice(
    session: str,
    percentage: int | None = None,
    gender: bool | str = False,
    pilot: bool = PILOT,
) -> None

Prepare data for SPoSE & VICE models.

Quote

(...) triplets are expected to be in the format N x 3, where N = number of trials (e.g., 100k) and 3 refers to the triplets, where col_0 = anchor_1, col_1 = anchor_2, col_2 = odd one out. Triplet data must be split into train and test splits, and named train_90.txt and test_10.txt, respectively.

For hyperparameter tuning, prepare only a percentage of the data.

For more information, see the repos of: SPoSE & VICE.

Parameters:

Name Type Description Default
session str

'2D', OR '3D'

required
percentage int | None

percentage of data to use (e.g., 10, 20, ...)

None
gender bool | str

True: use only triplets of the same gender, respectively. Compute recursively for both genders. OR str: specify the gender 'female' or 'male'.

False
pilot bool

True: use pilot data

PILOT
Source code in code/facesim3d/modeling/computational_choice_model.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
def prepare_data_for_spose_and_vice(
    session: str, percentage: int | None = None, gender: bool | str = False, pilot: bool = params.PILOT
) -> None:
    """
    Prepare data for `SPoSE` & `VICE` models.

    !!! quote
        (...) triplets are expected to be in the format N x 3, where N = number of trials (e.g., 100k) and
        3 refers to the triplets, where col_0 = anchor_1, col_1 = anchor_2, col_2 = odd one out.
        Triplet data must be split into train and test splits, and named `train_90.txt` and `test_10.txt`,
        respectively.

    For hyperparameter tuning, prepare only a percentage of the data.

    For more information, see the repos of:
        [SPoSE](https://github.com/ViCCo-Group/SPoSE) & [VICE](https://github.com/LukasMut/VICE).

    :param session: '2D', OR '3D'
    :param percentage: percentage of data to use (e.g., 10, 20, ...)
    :param gender: True: use only triplets of the same gender, respectively. Compute recursively for both genders.
                   OR str: specify the gender 'female' or 'male'.
    :param pilot: True: use pilot data
    """
    # Check for gender specification
    if gender is True:
        prepare_data_for_spose_and_vice(session=session, percentage=percentage, gender="female", pilot=pilot)
        prepare_data_for_spose_and_vice(session=session, percentage=percentage, gender="male", pilot=pilot)
        return  # stop here

    if isinstance(gender, str):
        gender = gender.lower()
        if gender not in {"female", "male"}:
            msg = "Gender must be 'female' OR 'male'!"
            raise ValueError(msg)

    # Set paths
    spose_data_dir = Path(paths.data.pilot.v2, "for_SPoSE", session) if pilot else Path(paths.data.main.spose, session)
    if gender:
        spose_data_dir /= gender
    if percentage is not None:
        if percentage not in {10, 20, 30, 40, 50}:
            msg = "'percentage' must be in [10, 20, 30, 40, 50]!"
            raise ValueError(msg)
        spose_data_dir /= f"{percentage}perc"
    spose_data_dir.mkdir(parents=True, exist_ok=True)

    p2_training_set = spose_data_dir / "train_90.txt"
    p2_test_set = spose_data_dir / "test_10.txt"

    # Check if data already exists
    if p2_training_set.is_file() and p2_test_set.is_file():
        cprint(string=f"SPoSE & VICE data for {session} already prepared.", col="g")
        return

    # Load data tables
    if pilot:
        data_table = read_pilot_data(clean_trials=True, verbose=False)
        participant_table = read_pilot_participant_data()[["ppid", "group_exp"]]
        # Use data of one session (2D, 3D) only
        participant_table = participant_table.loc[participant_table.group_exp == session]
        data_table = data_table.loc[data_table.ppid.isin(participant_table.ppid)].reset_index(drop=True)
    else:
        data_table = read_trial_results_of_session(session=session, clean_trials=True, verbose=False)

    # Prepare training tables
    data_table = data_table[["head1", "head2", "head3", "head_odd"]]
    data_table = data_table.drop(
        index=data_table.loc[data_table.head_odd == 0].index, axis=1
    )  # remove trials w/o judgment
    data_table = data_table.dropna()

    # In the case of gender specification, filter data for gender-specific triplets
    gender_cut = params.main.n_faces // 2  # == 50
    if gender:
        if gender == "female":
            data_table = data_table[(data_table <= gender_cut).all(axis=1)]
        else:
            data_table = data_table[(data_table > gender_cut).all(axis=1)]

    data_table = data_table.astype(int).reset_index(drop=True)

    # Bring the table in the following format: col_0: anchor_1, col_1: anchor_2, col_2: odd-one-out
    for i, row in tqdm(
        iterable=data_table.iterrows(), desc=f"Prepare data for SPoSE & VICE in {session}", total=len(data_table)
    ):
        data_table.iloc[i, 0:3] = pd.value_counts(row, sort=True, ascending=True).index
    data_table = data_table.drop(columns=["head_odd"])
    data_table.columns = ["col_0", "col_1", "col_2"]

    # Replace head number with index
    index_mapper = (
        partial(head_nr_to_pilot_matrix_index, pilot_version=params.PILOT_VERSION)
        if pilot
        else head_nr_to_main_matrix_index
    )
    # pilot v2: female: 0-12, male: 13-25
    data_table = data_table.applymap(index_mapper)
    # for main: == data_table = data_table - 1

    if gender == "male":
        # When we have male-only triplets, we need to re-index the heads starting from 0 (instead of 50)
        data_table -= gender_cut

    sampled_index = None  # init
    if percentage is not None:
        data_table = data_table.sample(frac=percentage / 100)
        sampled_index = data_table.index
        data_table = data_table.reset_index(drop=True)

    # Extract training and test set (9-1-Ratio)
    training_set = data_table.sample(frac=0.9)
    test_set = data_table.drop(index=training_set.index)

    # Save training and test
    training_set.to_csv(p2_training_set, index=False, header=False, sep=" ")
    test_set.to_csv(p2_test_set, index=False, header=False, sep=" ")
    # Note: SPoSe takes .npy files as input, too
    np.save(file=p2_training_set.with_suffix(".npy"), arr=training_set.to_numpy())
    np.save(file=p2_test_set.with_suffix(".npy"), arr=test_set.to_numpy())

    if percentage is not None:
        # Save sampled index for a fraction of data
        np.save(file=spose_data_dir / "sampled_index.npy", arr=sampled_index)