Select Git revision
main_03a_func_approx_high.py
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
main_03a_func_approx_high.py 4.88 KiB
# Same as main_02a_fgreedy_loop_compute.py, however additionaly using 2L kernel optimization.
import os
import time
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from utilities.kea6 import KEA
from vkoga_2L import kernels, tkernels
from vkoga_2L.vkoga_2L import VKOGA_2L
from utilities.dataset_collection import Dataset
np.random.seed(1)
# Settings
N_flexibility = 0
list_idx_dataset = [4, 5]
list_kernels = [kernels.Matern(k=k_mat, flag_normalize_x=True) for k_mat in range(5)]
list_tkernels = [tkernels.Matern(k=k_mat, flag_normalize_x=True) for k_mat in range(5)]
exchange_type, greedy_type = 'f_exchange', 'f_greedy'
path_to_files = 'results/'
os.makedirs(path_to_files, exist_ok=True)
list_idx_dataset = [4, 5]
for idx_dataset in list_idx_dataset:
if idx_dataset in [4]:
nCtrs_max = 200
elif idx_dataset in [5]:
nCtrs_max = 100
list_nCtrs = np.geomspace(5, nCtrs_max, 10, dtype=int)
dic_results = {}
dic_results[idx_dataset] = {}
# Select dataset
data = Dataset() # required for some of the following cases
if idx_dataset in [4, 5]:
list_names = ['example_5d_faster_conv', 'example_6d_kink']
data = Dataset()
f_func, dim = data.dic_dataset[list_names[idx_dataset - 4]]
X_train, X_test = np.random.rand(10000, dim), np.random.rand(10000, dim)
y_train, y_test = f_func(X_train), f_func(X_test)
else:
continue
# Loop over kernels
for idx_kernel, kernel in enumerate(list_kernels):
dic_results[idx_dataset][kernel.name] = {}
# Run 2L optimization - this is only done in order to obtain the matrix A
model_2L = VKOGA_2L(kernel=[kernel, list_tkernels[idx_kernel]], greedy_type=greedy_type, verbose=False, flag_2L_optimization=True)
model_2L.fit(X_train, y_train, maxIter=1)
# Transform X_train and X_test
X_train = X_train @ model_2L.A
X_test = X_test @ model_2L.A
# Run VKOGA
t_VKOGA_start = time.time()
model_VKOGA = VKOGA_2L(kernel=kernel, greedy_type=greedy_type, verbose=False)
model_VKOGA.fit(X_train, y_train, maxIter=nCtrs_max)
t_VKOGA_stop = time.time()
print(' ')
model_VKOGA.train_hist['f'].append(np.max(np.abs(model_VKOGA.predict(X_train) - y_train)**2))
for nCtrs in list_nCtrs:
## Run KEA initialized - use best intermediate model
t_KEAi_start = time.time()
model_KEAi = KEA(X_ctrs=X_train[model_VKOGA.indI_[:nCtrs], :],
y_ctrs=y_train[model_VKOGA.indI_[:nCtrs], :],
kernel=kernel, exchange_type=exchange_type)
_ = model_KEAi.fit(X_train, y_train, maxExch=min([nCtrs, 100]),
N_flexibility=N_flexibility, flag_debug=False, flag_best_model=True)
t_KEAi_stop = time.time()
s_kea = lambda x: model_KEAi.predict(x)
## Run VKOGA (it was computed already before) - use intermediate model
coeff_vkoga = model_VKOGA.Cut_[:nCtrs, :nCtrs].transpose() @ model_VKOGA.c[:nCtrs, :]
s_vkoga = lambda x: kernel.eval(x, model_VKOGA.ctrs_[:nCtrs]) @ coeff_vkoga
# Compute training and test predictions
y_train_vkoga, y_test_vkoga = s_vkoga(X_train), s_vkoga(X_test)
y_train_kea, y_test_kea = s_kea(X_train), s_kea(X_test)
# Compute Linfty and L2 errors over training and test set
Linfty_train_vkoga, MSE_train_vkoga = np.max(np.abs(y_train_vkoga - y_train)), np.mean((y_train_vkoga - y_train)**2)
Linfty_test_vkoga, MSE_test_vkoga = np.max(np.abs(y_test_vkoga - y_test)), np.mean((y_test_vkoga - y_test)**2)
Linfty_train_kea, MSE_train_kea = np.max(np.abs(y_train_kea - y_train)), np.mean((y_train_kea - y_train)**2)
Linfty_test_kea, MSE_test_kea = np.max(np.abs(y_test_kea - y_test)), np.mean((y_test_kea - y_test)**2)
dic_results[idx_dataset][kernel.name][nCtrs] = {}
dic_results[idx_dataset][kernel.name][nCtrs]['VKOGA'] = {'Linfty_train': Linfty_train_vkoga, 'MSE_train': MSE_train_vkoga,
'Linfty_test': Linfty_test_vkoga, 'MSE_test': MSE_test_vkoga,
't_train': t_VKOGA_stop - t_VKOGA_start}
dic_results[idx_dataset][kernel.name][nCtrs]['KEAi'] = {'Linfty_train': Linfty_train_kea, 'MSE_train': MSE_train_kea,
'Linfty_test': Linfty_test_kea, 'MSE_test': MSE_test_kea,
't_train': t_KEAi_stop - t_KEAi_start}
# store dic_results to disc
np.save(path_to_files + 'dic_results_dataset_{}.npy'.format(idx_dataset), dic_results)