Skip to content
Snippets Groups Projects
Select Git revision
  • 01130da2452713c5f0619e6a2de86dc3062d46b0
  • main default protected
2 results

main_03a_func_approx_high.py

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    main_03a_func_approx_high.py 4.88 KiB
    # Same as main_02a_fgreedy_loop_compute.py, however additionaly using 2L kernel optimization.
    
    
    import os
    import time
    import numpy as np
    
    from matplotlib import pyplot as plt
    from sklearn.model_selection import train_test_split
    
    from utilities.kea6 import KEA
    from vkoga_2L import kernels, tkernels
    from vkoga_2L.vkoga_2L import VKOGA_2L
    from utilities.dataset_collection import Dataset
    
    
    
    np.random.seed(1)
    
    # Settings
    N_flexibility = 0
    
    list_idx_dataset = [4, 5]
    list_kernels = [kernels.Matern(k=k_mat, flag_normalize_x=True) for k_mat in range(5)]
    list_tkernels = [tkernels.Matern(k=k_mat, flag_normalize_x=True) for k_mat in range(5)]
    
    exchange_type, greedy_type = 'f_exchange', 'f_greedy'
    
    path_to_files = 'results/'
    os.makedirs(path_to_files, exist_ok=True)
    
    
    list_idx_dataset = [4, 5]
    
    
    for idx_dataset in list_idx_dataset:
    
        if idx_dataset in [4]:
            nCtrs_max = 200
        elif idx_dataset in [5]:
            nCtrs_max = 100
    
        list_nCtrs = np.geomspace(5, nCtrs_max, 10, dtype=int)
    
        dic_results = {}
        dic_results[idx_dataset] = {}
    
        # Select dataset
        data = Dataset()            # required for some of the following cases
    
    
        if idx_dataset in [4, 5]:
            list_names = ['example_5d_faster_conv', 'example_6d_kink']
            data = Dataset()
            f_func, dim = data.dic_dataset[list_names[idx_dataset - 4]]
    
            X_train, X_test = np.random.rand(10000, dim), np.random.rand(10000, dim)
            y_train, y_test = f_func(X_train), f_func(X_test)
    
        else:
            continue
    
    
        # Loop over kernels
        for idx_kernel, kernel in enumerate(list_kernels):
    
            dic_results[idx_dataset][kernel.name] = {}
    
            # Run 2L optimization - this is only done in order to obtain the matrix A
            model_2L = VKOGA_2L(kernel=[kernel, list_tkernels[idx_kernel]], greedy_type=greedy_type, verbose=False, flag_2L_optimization=True)
            model_2L.fit(X_train, y_train, maxIter=1)
    
            # Transform X_train and X_test
            X_train = X_train @ model_2L.A
            X_test = X_test @ model_2L.A
    
            # Run VKOGA
            t_VKOGA_start = time.time()
            model_VKOGA = VKOGA_2L(kernel=kernel, greedy_type=greedy_type, verbose=False)
            model_VKOGA.fit(X_train, y_train, maxIter=nCtrs_max)
            t_VKOGA_stop = time.time()
            print(' ')
            model_VKOGA.train_hist['f'].append(np.max(np.abs(model_VKOGA.predict(X_train) - y_train)**2))
    
    
            for nCtrs in list_nCtrs:
    
                ## Run KEA initialized - use best intermediate model
                t_KEAi_start = time.time()
                model_KEAi = KEA(X_ctrs=X_train[model_VKOGA.indI_[:nCtrs], :], 
                                 y_ctrs=y_train[model_VKOGA.indI_[:nCtrs], :],
                            kernel=kernel, exchange_type=exchange_type)
                _ = model_KEAi.fit(X_train, y_train, maxExch=min([nCtrs, 100]), 
                                   N_flexibility=N_flexibility, flag_debug=False, flag_best_model=True)
                t_KEAi_stop = time.time()
    
                s_kea = lambda x: model_KEAi.predict(x)
    
    
                ## Run VKOGA (it was computed already before) - use intermediate model
                coeff_vkoga = model_VKOGA.Cut_[:nCtrs, :nCtrs].transpose() @ model_VKOGA.c[:nCtrs, :]
                s_vkoga = lambda x: kernel.eval(x, model_VKOGA.ctrs_[:nCtrs]) @ coeff_vkoga
               
                # Compute training and test predictions
                y_train_vkoga, y_test_vkoga = s_vkoga(X_train), s_vkoga(X_test)
                y_train_kea, y_test_kea = s_kea(X_train), s_kea(X_test)
    
                # Compute Linfty and L2 errors over training and test set
                Linfty_train_vkoga, MSE_train_vkoga = np.max(np.abs(y_train_vkoga - y_train)), np.mean((y_train_vkoga - y_train)**2)
                Linfty_test_vkoga, MSE_test_vkoga = np.max(np.abs(y_test_vkoga - y_test)), np.mean((y_test_vkoga - y_test)**2)
    
                Linfty_train_kea, MSE_train_kea = np.max(np.abs(y_train_kea - y_train)), np.mean((y_train_kea - y_train)**2)
                Linfty_test_kea, MSE_test_kea = np.max(np.abs(y_test_kea - y_test)), np.mean((y_test_kea - y_test)**2)
    
    
                dic_results[idx_dataset][kernel.name][nCtrs] = {}
                dic_results[idx_dataset][kernel.name][nCtrs]['VKOGA'] = {'Linfty_train': Linfty_train_vkoga, 'MSE_train': MSE_train_vkoga,
                                                                        'Linfty_test': Linfty_test_vkoga, 'MSE_test': MSE_test_vkoga,
                                                                        't_train': t_VKOGA_stop - t_VKOGA_start}
                dic_results[idx_dataset][kernel.name][nCtrs]['KEAi'] = {'Linfty_train': Linfty_train_kea, 'MSE_train': MSE_train_kea,
                                                                        'Linfty_test': Linfty_test_kea, 'MSE_test': MSE_test_kea,
                                                                        't_train': t_KEAi_stop - t_KEAi_start}
    
    
        # store dic_results to disc
        np.save(path_to_files + 'dic_results_dataset_{}.npy'.format(idx_dataset), dic_results)