main_03a_func_approx_high.py

# Same as main_02a_fgreedy_loop_compute.py, however additionaly using 2L kernel optimization.


import os
import time
import numpy as np

from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

from utilities.kea6 import KEA
from vkoga_2L import kernels, tkernels
from vkoga_2L.vkoga_2L import VKOGA_2L
from utilities.dataset_collection import Dataset


np.random.seed(1)

# Settings
N_flexibility = 0

list_idx_dataset = [4, 5]
list_kernels = [kernels.Matern(k=k_mat, flag_normalize_x=True) for k_mat in range(5)]
list_tkernels = [tkernels.Matern(k=k_mat, flag_normalize_x=True) for k_mat in range(5)]

exchange_type, greedy_type = 'f_exchange', 'f_greedy'

path_to_files = 'results/'
os.makedirs(path_to_files, exist_ok=True)


list_idx_dataset = [4, 5]


for idx_dataset in list_idx_dataset:

    if idx_dataset in [4]:
        nCtrs_max = 200
    elif idx_dataset in [5]:
        nCtrs_max = 100

    list_nCtrs = np.geomspace(5, nCtrs_max, 10, dtype=int)

    dic_results = {}
    dic_results[idx_dataset] = {}

    # Select dataset
    data = Dataset()            # required for some of the following cases


    if idx_dataset in [4, 5]:
        list_names = ['example_5d_faster_conv', 'example_6d_kink']
        data = Dataset()
        f_func, dim = data.dic_dataset[list_names[idx_dataset - 4]]

        X_train, X_test = np.random.rand(10000, dim), np.random.rand(10000, dim)
        y_train, y_test = f_func(X_train), f_func(X_test)

    else:
        continue


    # Loop over kernels
    for idx_kernel, kernel in enumerate(list_kernels):

        dic_results[idx_dataset][kernel.name] = {}

        # Run 2L optimization - this is only done in order to obtain the matrix A
        model_2L = VKOGA_2L(kernel=[kernel, list_tkernels[idx_kernel]], greedy_type=greedy_type, verbose=False, flag_2L_optimization=True)
        model_2L.fit(X_train, y_train, maxIter=1)

        # Transform X_train and X_test
        X_train = X_train @ model_2L.A
        X_test = X_test @ model_2L.A

        # Run VKOGA
        t_VKOGA_start = time.time()
        model_VKOGA = VKOGA_2L(kernel=kernel, greedy_type=greedy_type, verbose=False)
        model_VKOGA.fit(X_train, y_train, maxIter=nCtrs_max)
        t_VKOGA_stop = time.time()
        print(' ')
        model_VKOGA.train_hist['f'].append(np.max(np.abs(model_VKOGA.predict(X_train) - y_train)**2))


        for nCtrs in list_nCtrs:

            ## Run KEA initialized - use best intermediate model
            t_KEAi_start = time.time()
            model_KEAi = KEA(X_ctrs=X_train[model_VKOGA.indI_[:nCtrs], :],
                             y_ctrs=y_train[model_VKOGA.indI_[:nCtrs], :],
                        kernel=kernel, exchange_type=exchange_type)
            _ = model_KEAi.fit(X_train, y_train, maxExch=min([nCtrs, 100]),
                               N_flexibility=N_flexibility, flag_debug=False, flag_best_model=True)
            t_KEAi_stop = time.time()

            s_kea = lambda x: model_KEAi.predict(x)


            ## Run VKOGA (it was computed already before) - use intermediate model
            coeff_vkoga = model_VKOGA.Cut_[:nCtrs, :nCtrs].transpose() @ model_VKOGA.c[:nCtrs, :]
            s_vkoga = lambda x: kernel.eval(x, model_VKOGA.ctrs_[:nCtrs]) @ coeff_vkoga

            # Compute training and test predictions
            y_train_vkoga, y_test_vkoga = s_vkoga(X_train), s_vkoga(X_test)
            y_train_kea, y_test_kea = s_kea(X_train), s_kea(X_test)

            # Compute Linfty and L2 errors over training and test set
            Linfty_train_vkoga, MSE_train_vkoga = np.max(np.abs(y_train_vkoga - y_train)), np.mean((y_train_vkoga - y_train)**2)
            Linfty_test_vkoga, MSE_test_vkoga = np.max(np.abs(y_test_vkoga - y_test)), np.mean((y_test_vkoga - y_test)**2)

            Linfty_train_kea, MSE_train_kea = np.max(np.abs(y_train_kea - y_train)), np.mean((y_train_kea - y_train)**2)
            Linfty_test_kea, MSE_test_kea = np.max(np.abs(y_test_kea - y_test)), np.mean((y_test_kea - y_test)**2)


            dic_results[idx_dataset][kernel.name][nCtrs] = {}
            dic_results[idx_dataset][kernel.name][nCtrs]['VKOGA'] = {'Linfty_train': Linfty_train_vkoga, 'MSE_train': MSE_train_vkoga,
                                                                    'Linfty_test': Linfty_test_vkoga, 'MSE_test': MSE_test_vkoga,
                                                                    't_train': t_VKOGA_stop - t_VKOGA_start}
            dic_results[idx_dataset][kernel.name][nCtrs]['KEAi'] = {'Linfty_train': Linfty_train_kea, 'MSE_train': MSE_train_kea,
                                                                    'Linfty_test': Linfty_test_kea, 'MSE_test': MSE_test_kea,
                                                                    't_train': t_KEAi_stop - t_KEAi_start}


    # store dic_results to disc
    np.save(path_to_files + 'dic_results_dataset_{}.npy'.format(idx_dataset), dic_results)