Initial commit of all files.

a3ff177b · Wenzel, Tizian · ade76c90 · a3ff177b · a3ff177b · a3ff177b
Commit a3ff177b authored Apr 30, 2024 by Wenzel, Tizian
--- a/main_01_insertion_vs_removal.py
+++ b/main_01_insertion_vs_removal.py
+# Comparison of VKOGA and ERBA on a simple 2D function
+# THIS IS THE CODE USED FOR THE FIRST NUMERICAL EXPERIMENTS WITHIN THE KERNEL EXCHANGE PAPER.
+import time
+import numpy as np
+from scipy.stats import qmc
+from matplotlib import pyplot as plt
+from vkoga_2L.kernels import Matern
+from vkoga_2L.vkoga_2L import VKOGA_2L
+from utilities.erba import ERBA
+nr_setting = 1
+## Some settings
+if nr_setting == 0:
+    dim = 2
+    f_func = lambda x: x[:, [0]]**2 + x[:, [1]]**2
+    kernel = Matern(k=2, flag_normalize_x=True)
+    Npoints = 200
+elif nr_setting == 1:
+    dim = 3
+    f_func = lambda x: np.abs(x[:, [0]]-.5) + np.sin(x[:, [1]] + x[:, [2]])
+    kernel = Matern(k=1, flag_normalize_x=True)
+    Npoints = 200
+greedy_type = 'f_greedy'    
+## Some more settings
+sampler = qmc.Sobol(d=dim, scramble=False)      # use low discrepancy points for increased stability
+sample = 2*sampler.random_base2(m=int(np.ceil(np.log(Npoints) / np.log(2))))-1
+n_pts_max = sample.shape[0]
+maxIter = n_pts_max
+## Create data
+X_train, X_test = sample, np.random.rand(n_pts_max, dim)
+y_train, y_test = f_func(X_train), f_func(X_test)
+## Run VKOGA
+t0 = time.time()
+model_VKOGA = VKOGA_2L(kernel=kernel, greedy_type=greedy_type, verbose=True)
+model_VKOGA.fit(X_train, y_train, X_val=X_test, y_val=y_test, maxIter=maxIter)
+# Compute final error and append to list
+model_VKOGA.train_hist['f'].append(np.max(np.abs(model_VKOGA.predict(X_train) - y_train)**2))
+model_VKOGA.train_hist['f val'].append(np.max(np.abs(model_VKOGA.predict(X_test) - y_test)**2))
+t1 = time.time()
+## Run ERBA
+t2 = time.time()
+model_ERBA = ERBA(kernel=kernel, greedy_type=greedy_type, verbose=True)
+model_ERBA.fit(X_train, y_train, X_val=X_test, y_val=y_test, maxIter=maxIter)
+t3 = time.time()
+## Visualize some results
+list_idx_vkoga = list(np.arange(len(model_VKOGA.train_hist['f'])))
+list_idx_erba = list(np.arange(1+len(model_ERBA.train_hist['f'])))
+plt.figure(10 + nr_setting)
+plt.clf()
+plt.plot(list_idx_vkoga[1:], np.sqrt(model_VKOGA.train_hist['f'][1:]), label='VKOGA')
+plt.plot(list_idx_erba[1:], np.sqrt(model_ERBA.train_hist['f'][::-1]), label='ERBA')
+plt.legend()
+plt.xscale('log')
+plt.yscale('log')
+plt.show(block=False)
--- a/main_02a_func_approx_low_compute.py
+++ b/main_02a_func_approx_low_compute.py
+# Based on main_01_smooth.py, I want to loop over:
+# - dataset
+# - kernel
+# - number of centers
+import os
+import time
+import numpy as np
+from matplotlib import pyplot as plt
+from sklearn.model_selection import train_test_split
+from utilities.dataset_collection import Dataset
+from vkoga_2L.kernels import Matern, Wendland       # use 2L for scaling
+from vkoga_2L.vkoga_2L import VKOGA_2L
+from utilities.kea6 import KEA
+np.random.seed(1)
+# Settings
+N_flexibility = 0            # this is not used in the experiments, thus set to zero
+list_idx_dataset = [0, 1, 2, 3]
+list_kernels = [Matern(k=k_mat, flag_normalize_x=True) for k_mat in range(5)]
+exchange_type, greedy_type = 'f_exchange', 'f_greedy'
+path_to_files = 'results/'
+os.makedirs(path_to_files, exist_ok=True)
+for idx_dataset in list_idx_dataset:
+    # Pick correct number of centers
+    if idx_dataset in [0]:
+        nCtrs_max = 150
+    elif idx_dataset in [1, 2, 3]:
+        nCtrs_max = 80
+    elif idx_dataset in [4]:
+        nCtrs_max = 200
+    elif idx_dataset in [5]:
+        nCtrs_max = 100
+    list_nCtrs = np.geomspace(5, nCtrs_max, 10, dtype=int)
+    dic_results = {}
+    dic_results[idx_dataset] = {}
+    # Select dataset
+    data = Dataset()
+    if idx_dataset == 0:
+        dim = 2
+        f_func = data.dic_dataset['example_2d_franke'][0]
+        X_train, X_test = np.random.rand(1000, dim), np.random.rand(1000, dim)
+        y_train, y_test = f_func(X_train), f_func(X_test)
+    elif idx_dataset in [1, 2, 3]:
+        dim = 2
+        f_func = data.dic_dataset['example_F' + str(idx_dataset + 1)][0]
+        X_train, X_test = np.random.rand(1000, dim), np.random.rand(1000, dim)
+        y_train, y_test = f_func(X_train), f_func(X_test)
+    else:
+        continue
+    # Loop over kernels
+    for kernel in list_kernels:
+        dic_results[idx_dataset][kernel.name] = {}
+        # Run VKOGA
+        t_VKOGA_start = time.time()
+        model_VKOGA = VKOGA_2L(kernel=kernel, greedy_type=greedy_type, verbose=False)
+        model_VKOGA.fit(X_train, y_train, maxIter=nCtrs_max)
+        t_VKOGA_stop = time.time()
+        print(' ')
+        model_VKOGA.train_hist['f'].append(np.max(np.abs(model_VKOGA.predict(X_train) - y_train)**2))
+        for nCtrs in list_nCtrs:
+            ## Run KEA initialized - use best intermediate model
+            t_KEAi_start = time.time()
+            model_KEAi = KEA(X_ctrs=X_train[model_VKOGA.indI_[:nCtrs], :], 
+                             y_ctrs=y_train[model_VKOGA.indI_[:nCtrs], :],
+                        kernel=kernel, exchange_type=exchange_type)
+            _ = model_KEAi.fit(X_train, y_train, maxExch=min([nCtrs, 100]), 
+                               N_flexibility=N_flexibility, flag_debug=False, flag_best_model=True)
+            t_KEAi_stop = time.time()
+            s_kea = lambda x: model_KEAi.predict(x)
+            ## Run VKOGA (it was computed already before) - use intermediate model
+            coeff_vkoga = model_VKOGA.Cut_[:nCtrs, :nCtrs].transpose() @ model_VKOGA.c[:nCtrs, :]
+            s_vkoga = lambda x: kernel.eval(x, model_VKOGA.ctrs_[:nCtrs]) @ coeff_vkoga
+            # Compute training and test predictions
+            y_train_vkoga, y_test_vkoga = s_vkoga(X_train), s_vkoga(X_test)
+            y_train_kea, y_test_kea = s_kea(X_train), s_kea(X_test)
+            # Compute Linfty and L2 errors over training and test set
+            Linfty_train_vkoga, MSE_train_vkoga = np.max(np.abs(y_train_vkoga - y_train)), np.mean((y_train_vkoga - y_train)**2)
+            Linfty_test_vkoga, MSE_test_vkoga = np.max(np.abs(y_test_vkoga - y_test)), np.mean((y_test_vkoga - y_test)**2)
+            Linfty_train_kea, MSE_train_kea = np.max(np.abs(y_train_kea - y_train)), np.mean((y_train_kea - y_train)**2)
+            Linfty_test_kea, MSE_test_kea = np.max(np.abs(y_test_kea - y_test)), np.mean((y_test_kea - y_test)**2)
+            dic_results[idx_dataset][kernel.name][nCtrs] = {}
+            dic_results[idx_dataset][kernel.name][nCtrs]['VKOGA'] = {'Linfty_train': Linfty_train_vkoga, 'MSE_train': MSE_train_vkoga,
+                                                                    'Linfty_test': Linfty_test_vkoga, 'MSE_test': MSE_test_vkoga,
+                                                                    't_train': t_VKOGA_stop - t_VKOGA_start}
+            dic_results[idx_dataset][kernel.name][nCtrs]['KEAi'] = {'Linfty_train': Linfty_train_kea, 'MSE_train': MSE_train_kea,
+                                                                    'Linfty_test': Linfty_test_kea, 'MSE_test': MSE_test_kea,
+                                                                    't_train': t_KEAi_stop - t_KEAi_start}
+    # store dic_results to disc
+    np.save(path_to_files + 'dic_results_dataset_{}.npy'.format(idx_dataset), dic_results)
--- a/main_02b_func_approx_low_evaluate.py
+++ b/main_02b_func_approx_low_evaluate.py
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+path_to_files = 'results/'
+list_files = os.listdir(path_to_files)
+R = np.linspace(0, 1, int(1.5 * 5))
+array_color = plt.cm.hsv(R)
+for idx_file, file in enumerate(list_files):
+    dic_results = np.load(path_to_files + file, allow_pickle=True).item()
+    nr_dataset = int(file.split('_')[-1].split('.')[0])
+    if nr_dataset not in [0, 1, 2, 3, 4, 5]:
+        continue
+    assert nr_dataset == list(dic_results.keys())[0], 'The dataset number should be the same as the key in the dictionary.'
+    assert len(dic_results.keys()) == 1, 'There should be only one dataset in the file.'
+    list_kernels = list(dic_results[nr_dataset].keys())
+    list_nCtrs = list(dic_results[nr_dataset][list_kernels[0]].keys())
+    str_error = 'Linfty_test'
+    # Visualization of convergence
+    plt.figure(100 + idx_file + 1)
+    plt.clf()
+    for idx_kernel, str_kernel in enumerate(list_kernels):
+        if int(str_kernel[-1]) % 2 == 1:
+            continue
+        plt.plot(list_nCtrs, [dic_results[nr_dataset][str_kernel][nCtrs]['VKOGA'][str_error] for nCtrs in list_nCtrs],
+                'x-', color=array_color[idx_kernel, :])
+    for idx_kernel, str_kernel in enumerate(list_kernels):
+        if int(str_kernel[-1]) % 2 == 1:
+            continue
+        plt.plot(list_nCtrs, [dic_results[nr_dataset][str_kernel][nCtrs]['KEAi'][str_error] for nCtrs in list_nCtrs],
+                'o--', color=array_color[idx_kernel, :])
+    plt.xscale('log')
+    plt.yscale('log')
+    plt.legend([str_kernel for str_kernel in list_kernels if int(str_kernel[-1]) % 2 == 0])
+    plt.title('Dataset ' + str(nr_dataset))
+    plt.show(block=False)
--- a/main_03a_func_approx_high.py
+++ b/main_03a_func_approx_high.py
+# Same as main_02a_fgreedy_loop_compute.py, however additionaly using 2L kernel optimization.
+import os
+import time
+import numpy as np
+from matplotlib import pyplot as plt
+from sklearn.model_selection import train_test_split
+from utilities.kea6 import KEA
+from vkoga_2L import kernels, tkernels
+from vkoga_2L.vkoga_2L import VKOGA_2L
+from utilities.dataset_collection import Dataset
+np.random.seed(1)
+# Settings
+N_flexibility = 0
+list_idx_dataset = [0, 1, 2, 3, 4, 5, 7, 8, 9]
+list_kernels = [kernels.Matern(k=k_mat, flag_normalize_x=True) for k_mat in range(5)]
+list_tkernels = [tkernels.Matern(k=k_mat, flag_normalize_x=True) for k_mat in range(5)]
+exchange_type, greedy_type = 'f_exchange', 'f_greedy'
+path_to_files = 'results/'
+os.makedirs(path_to_files, exist_ok=True)
+list_idx_dataset = [4, 5]
+for idx_dataset in list_idx_dataset:
+    if idx_dataset in [4]:
+        nCtrs_max = 200
+    elif idx_dataset in [5]:
+        nCtrs_max = 100
+    list_nCtrs = np.geomspace(5, nCtrs_max, 10, dtype=int)
+    dic_results = {}
+    dic_results[idx_dataset] = {}
+    # Select dataset
+    data = Dataset()            # required for some of the following cases
+    if idx_dataset in [4, 5]:
+        list_names = ['example_5d_faster_conv', 'example_6d_kink']
+        data = Dataset()
+        f_func, dim = data.dic_dataset[list_names[idx_dataset - 4]]
+        X_train, X_test = np.random.rand(10000, dim), np.random.rand(10000, dim)
+        y_train, y_test = f_func(X_train), f_func(X_test)
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2)
+    else:
+        continue
+    # Loop over kernels
+    for idx_kernel, kernel in enumerate(list_kernels):
+        dic_results[idx_dataset][kernel.name] = {}
+        # Run 2L optimization - this is only done in order to obtain the matrix A
+        model_2L = VKOGA_2L(kernel=[kernel, list_tkernels[idx_kernel]], greedy_type=greedy_type, verbose=False, flag_2L_optimization=True)
+        model_2L.fit(X_train, y_train, maxIter=1)
+        # Transform X_train and X_test
+        X_train = X_train @ model_2L.A
+        X_test = X_test @ model_2L.A
+        # Run VKOGA
+        t_VKOGA_start = time.time()
+        model_VKOGA = VKOGA_2L(kernel=kernel, greedy_type=greedy_type, verbose=False)
+        model_VKOGA.fit(X_train, y_train, maxIter=nCtrs_max)
+        t_VKOGA_stop = time.time()
+        print(' ')
+        model_VKOGA.train_hist['f'].append(np.max(np.abs(model_VKOGA.predict(X_train) - y_train)**2))
+        for nCtrs in list_nCtrs:
+            ## Run KEA initialized - use best intermediate model
+            t_KEAi_start = time.time()
+            model_KEAi = KEA(X_ctrs=X_train[model_VKOGA.indI_[:nCtrs], :], 
+                             y_ctrs=y_train[model_VKOGA.indI_[:nCtrs], :],
+                        kernel=kernel, exchange_type=exchange_type)
+            _ = model_KEAi.fit(X_train, y_train, maxExch=min([nCtrs, 100]), 
+                               N_flexibility=N_flexibility, flag_debug=False, flag_best_model=True)
+            t_KEAi_stop = time.time()
+            s_kea = lambda x: model_KEAi.predict(x)
+            ## Run VKOGA (it was computed already before) - use intermediate model
+            coeff_vkoga = model_VKOGA.Cut_[:nCtrs, :nCtrs].transpose() @ model_VKOGA.c[:nCtrs, :]
+            s_vkoga = lambda x: kernel.eval(x, model_VKOGA.ctrs_[:nCtrs]) @ coeff_vkoga
+            # Compute training and test predictions
+            y_train_vkoga, y_test_vkoga = s_vkoga(X_train), s_vkoga(X_test)
+            y_train_kea, y_test_kea = s_kea(X_train), s_kea(X_test)
+            # Compute Linfty and L2 errors over training and test set
+            Linfty_train_vkoga, MSE_train_vkoga = np.max(np.abs(y_train_vkoga - y_train)), np.mean((y_train_vkoga - y_train)**2)
+            Linfty_test_vkoga, MSE_test_vkoga = np.max(np.abs(y_test_vkoga - y_test)), np.mean((y_test_vkoga - y_test)**2)
+            Linfty_train_kea, MSE_train_kea = np.max(np.abs(y_train_kea - y_train)), np.mean((y_train_kea - y_train)**2)
+            Linfty_test_kea, MSE_test_kea = np.max(np.abs(y_test_kea - y_test)), np.mean((y_test_kea - y_test)**2)
+            dic_results[idx_dataset][kernel.name][nCtrs] = {}
+            dic_results[idx_dataset][kernel.name][nCtrs]['VKOGA'] = {'Linfty_train': Linfty_train_vkoga, 'MSE_train': MSE_train_vkoga,
+                                                                    'Linfty_test': Linfty_test_vkoga, 'MSE_test': MSE_test_vkoga,
+                                                                    't_train': t_VKOGA_stop - t_VKOGA_start}
+            dic_results[idx_dataset][kernel.name][nCtrs]['KEAi'] = {'Linfty_train': Linfty_train_kea, 'MSE_train': MSE_train_kea,
+                                                                    'Linfty_test': Linfty_test_kea, 'MSE_test': MSE_test_kea,
+                                                                    't_train': t_KEAi_stop - t_KEAi_start}
+    # store dic_results to disc
+    np.save(path_to_files + 'dic_results_dataset_{}.npy'.format(idx_dataset), dic_results)
--- a/setup.sh
+++ b/setup.sh
+set -e
+export BASEDIR="$(cd "$(dirname ${BASH_SOURCE[0]})" ;  pwd -P)"
+cd "${BASEDIR}"
+# Create and source virtualenv
+if [ -e "${BASEDIR}/venv/bin/activate" ]; then
+	echo "using existing virtualenv"
+else	
+	echo "creating virtualenv ..."
+	virtualenv --python=python3 venv
+fi
+source venv/bin/activate
+# Upgrade pip and install libraries (dependencies are also installed)
+pip install --upgrade pip
+pip install git+https://gitlab.mathematik.uni-stuttgart.de/pub/ians-anm/2l-vkoga@v0.1.2
--- a/utilities/dataset_collection.py
+++ b/utilities/dataset_collection.py
+import numpy as np
+class Dataset():
+    """
+    Implementation of the test functions which were used in the experiments
+    """
+    def __init__(self, N_points=10000):
+        self.N_points = N_points
+        self.X = None
+        self.y = None
+        self.dic_dataset = {
+            'example_2d_franke': (lambda x: 0.75 * np.exp(-(9 * x[:, [0]] - 2) ** 2 / 4 - (9 * x[:, [1]]- 2) ** 2 / 4)
+                                            + 0.75 * np.exp(-(9 * x[:, [0]] + 1) ** 2 / 49 - (9 * x[:, [1]] + 1) / 10)
+                                            + 0.5 * np.exp(-(9 * x[:, [0]] - 7) ** 2 / 4 - (9 * x[:, [1]] - 3) ** 2 / 4)
+                                            - 0.2 * np.exp(-(9 * x[:, [0]] - 4) ** 2 - (9 * x[:, [1]] - 7) ** 2), 2),
+            'example_F2': (lambda x: 1/9 * (np.tanh(9*x[:, [1]] - 9 * x[:, [0]]) + 1), 2),
+            'example_F3': (lambda x: ( 125 / 100 + np.cos(5.4 * x[:, [1]])) / (6 * (1 + (3 * x[:, [0]] - 1)**2)), 2),
+            'example_F4': (lambda x: 1/3 * np.exp(- 81/16 * ((x[:, [0]] - 1/2)**2 + (x[:, [1]] - 1/2)**2)), 2),
+            'example_5d_faster_conv': (lambda x: np.exp(-4 * np.sum(x[:, :] - .5 * np.ones_like(x[:, :]), axis=1, keepdims=True) ** 2), 5),
+            'example_6d_kink': (lambda x: (np.exp(-4 * np.sum((x[:, :] - .5 * np.ones_like(x[:, :])) ** 2, axis=1, keepdims=True)) + 2 * np.abs(x[:, [0]] - .5)), 6),
+        }
+    def get_data(self, name_dataset):
+        X, y = None, None
+        if name_dataset in self.dic_dataset.keys():
+            fcn, dim = self.dic_dataset[name_dataset]
+            X = np.random.rand(self.N_points, dim)
+            y = fcn(X)
+        return X, y
--- a/utilities/erba.py
+++ b/utilities/erba.py
+# Low effort implementation of ERBA. Mind that this is a very basic implementation suited to the needs of this experiment,
+# and not optimized for speed.
+from vkoga_2L.kernels import Gaussian
+import numpy as np
+from sklearn.base import BaseEstimator
+from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
+from scipy.spatial import distance_matrix
+from datetime import datetime
+# VKOGA implementation
+class ERBA(BaseEstimator):
+    def __init__(self, kernel=Gaussian(), kernel_par=1,
+                 verbose=True, n_report=10,
+                 greedy_type='p_greedy', 
+                 tol_f=1e-10, tol_p=1e-10):
+        # Set the verbosity on/off
+        self.verbose = verbose
+        # Set the frequency of report
+        self.n_report = n_report
+        # Set the params defining the method 
+        self.kernel = kernel
+        self.kernel_par = kernel_par
+        self.greedy_type = greedy_type
+        # Define the greedy method properly
+        self.greedy_type = greedy_type
+        assert self.greedy_type == 'f_greedy', 'Only f_greedy is implemented so far!'
+        # Set the stopping values
+        self.tol_f = tol_f
+        self.tol_p = tol_p
+        # Initialize the convergence history
+        self.train_hist = {}
+        self.train_hist['n'] = []
+        self.train_hist['f'] = []
+        self.train_hist['f val'] = []
+    def fit(self, X, y, X_val=None, y_val=None, maxIter=None):
+        # Check the datasets
+        X, y = check_X_y(X, y, multi_output=True)
+        if len(y.shape) == 1:
+            y = y[:, None]
+        # Get the data dimension
+        N, q = y.shape
+        # Set maxIter_continue
+        if maxIter is None or maxIter > N:
+            self.maxIter = 100
+        else:
+            self.maxIter = maxIter
+        # Initialize list for the chosen and non-chosen indices
+        indI = list(range(N))
+        notIndI = []
+        # Iterative selection of new points
+        for idx_iter in range(self.maxIter):
+            # Compute kernel model
+            A = self.kernel.eval(X[indI, :], X[indI, :])
+            invA = np.linalg.inv(A)
+            coeff = np.linalg.solve(A, y[indI, :])
+            # Compute current training and test error
+            y_train_pred = self.kernel.eval(X, X[indI, :]) @ coeff
+            error_train = np.max(np.sum((y - y_train_pred) ** 2, axis=1))
+            self.train_hist['f'].append(error_train)
+            if X_val is not None:
+                y_val_pred = self.kernel.eval(X_val, X[indI, :]) @ coeff
+                error_val = np.max(np.sum((y_val - y_val_pred) ** 2, axis=1))
+                self.train_hist['f val'].append(error_val)
+            # Compute error criterion for f-greedy
+            array_error = np.abs(coeff / np.diag(invA).reshape(-1,1))           # absolute value is important here!!
+            # choose argmin of array_error
+            idx = np.argmin(array_error)
+            notIndI.append(indI[idx])
+            indI.pop(idx)
+            if idx_iter % 10 == 0 and self.verbose:
+                print(datetime.now().strftime("%H:%M:%S"), 'Calculation for idx_iter={} finished.'.format(idx_iter))
+        # Save the final indices
+        self.indI = indI
+        return self
--- a/utilities/kea6.py
+++ b/utilities/kea6.py
+# Implementation of kea. Implementation is similar to VKOGA.
+import time
+import numpy as np
+from sklearn.base import BaseEstimator
+from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
+from vkoga_2L.vkoga_2L import VKOGA_2L
+from vkoga_2L.kernels import Gaussian
+# KEA implementation
+class KEA(BaseEstimator):
+    '''
+    Implementation of KEA: Kernel Exchange Algorithm.
+    '''
+    def __init__(self, X_ctrs, y_ctrs, kernel=Gaussian(), kernel_par=1,
+                 verbose=True, n_report=10,
+                 exchange_type='p_exchange', reg_par=0,
+                 tol_f=1e-10, tol_p=1e-10, beta=None):
+        # Initial set of centers and corresponding values
+        X_ctrs, y_ctrs = check_X_y(X_ctrs, y_ctrs, multi_output=True)   # ToDo: Think about multi output
+        self.X_ctrs, indices = np.unique(X_ctrs, axis=0, return_index=True)
+        self.y_ctrs = y_ctrs[indices, :]
+        # Set the verbosity on/off
+        self.verbose = verbose
+        # Set the frequency of report
+        self.n_report = n_report
+        # Set the params defining the method 
+        self.kernel = kernel
+        self.kernel_par = kernel_par
+        self.exchange_type = exchange_type
+        self.reg_par = reg_par
+        # Define the greedy method properly
+        if beta is None:
+            self.exchange_type = exchange_type
+            if self.exchange_type == 'f_exchange':
+                self.beta = 1.0
+            elif self.exchange_type == 'p_exchange':
+                self.beta = 0.0
+            elif self.exchange_type == 'random':
+                self.beta = 1.0  # use any value, should not matter!
+        else:
+            self.beta = float(beta)
+            if self.beta == 0.0:
+                self.exchange_type = 'p_exchange'
+            elif self.beta == 1.0:
+                self.exchange_type = 'f_exchange'
+            elif self.beta == np.inf:
+                self.exchange_type = 'fp_exchange'
+            else:
+                self.exchange_type = '{}'.format(beta)
+        # ToDo: Did never think about the regularization parameter ..
+        assert reg_par == 0, 'reg_par > 0 might not be implemented correctly!'
+        self.flag_val = None
+        # Set the stopping values
+        self.tol_f = tol_f
+        self.tol_p = tol_p
+        # Some further settings
+        self.ctrs_ = None
+        self.Cut_ = None
+        self.c = None
+        # Initialize the convergence history
+        self.train_hist_add = {}
+        self.train_hist_add['f'] = []
+        self.train_hist_add['p'] = []
+        self.train_hist_add['n'] = []
+        self.train_hist_remove = {}
+        self.train_hist_remove['f'] = []
+        self.train_hist_remove['p'] = []
+    def selection_rule_add(self, f, p):
+        # Selection rule for adding centers
+        f = np.sum(f ** 2, axis=1)
+        f_max = np.max(f)
+        p_max = np.max(p)
+        if self.exchange_type == 'f_exchange':
+            idx = np.argmax(f)
+        elif self.exchange_type == 'fp_exchange':
+            idx = np.argmax(f / p)
+        elif self.exchange_type == 'p_exchange':
+            idx = np.argmax(p)
+        elif self.exchange_type == 'rand':    # pick some random point - David Holzmüller asked me about its performance
+            idx = np.random.randint(len(p))
+        else:
+            idx = np.argmax(f ** self.beta * p ** (1-self.beta))
+        return idx, f_max, p_max
+    def selection_rule_remove(self, f, p):
+        # Selection rule for removing centers
+        # Compute leave one out cross validation errors
+        loocv_res1 = np.abs(self.coef_ / np.diag(self.invA)[:, None])  # computation of loocv
+        loocv_pow1 = 1 / np.sqrt(np.diag(self.invA)).reshape(-1, 1)
+        f = np.sum(f ** 2, axis=1)
+        f_max = np.max(f)
+        p_max = np.max(p)
+        if self.exchange_type == 'f_exchange':      # use minimal loocv error to select point for exchange!
+            idx = np.argmin(loocv_res1)
+        elif self.exchange_type == 'fp_exchange':
+            idx = np.argmin(loocv_res1 / loocv_pow1)
+        elif self.exchange_type == 'p_exchange':
+            idx = np.argmin(loocv_pow1)
+        elif self.exchange_type == 'rand':    # pick some random point - David Holzmüller asked me about its performance
+            idx = np.random.randint(len(p))
+        else:
+            idx = np.argmin(loocv_res1 ** self.beta * loocv_pow1 ** (1-self.beta))
+        return idx, f_max, p_max
+    def fit(self, X, y, maxExch=100, flag_debug=False, N_flexibility=0, flag_best_model=False):
+        self.t0 = time.time()
+        # We assume that we start with an initial set of centers and corresponding values, i.e. X_ctrs, y_ctrs
+        X_ctrs = self.X_ctrs
+        y_ctrs = self.y_ctrs
+        # Make sure that self.X_ctrs is included in X - otherwise add it
+        X, indices = np.unique(X, axis=0, return_index=True)            # remove duplicates
+        y = y[indices, :]
+        X = np.concatenate((X_ctrs, X), axis=0)
+        y = np.concatenate((y_ctrs, y), axis=0)
+        X, indices_index, indices_inverse = \
+            np.unique(X, axis=0, return_index=True, return_inverse=True)
+        y = y[indices_index, :]
+        indI = list(indices_inverse[:X_ctrs.shape[0]-N_flexibility])        # remove N_flexibility additional centers
+        # Introduce a suitable ordering into indI to save computational time later on
+        model_greedy = VKOGA_2L(kernel=self.kernel, beta=self.beta, verbose=False,
+                             tol_p=1e-12, tol_f=1e-12)
+        _ = model_greedy.fit(X[indI, :], y[indI, :], maxIter=len(indI))
+        # assert model_greedy.ctrs_.shape[0] == len(indI), 'model_greedy did not pick all centers!'
+        if model_greedy.ctrs_.shape[0] != len(indI):
+            print('Watch out, model_greedy did not pick all centers due to ... (probably points to close to each other)!')
+        indI = [indI[idx] for idx in model_greedy.indI_]    # ToDo: Check this ordering whether everything worked out
+        # now we can forget about ..., simply use X, y and track indI and notIndI
+        del self.X_ctrs, X_ctrs
+        del self.y_ctrs, y_ctrs
+        # Set up stuff & get predictions to directly jump into loop
+        self.ctrs_ = X[indI, :]
+        self.N = self.ctrs_.shape[0]
+        self.c = model_greedy.c
+        self.Cut_ = model_greedy.Cut_
+        self.invA = self.Cut_.T @ self.Cut_
+        self.coef_ = self.invA @ y[indI, :]
+        # Get predictions - here we really need to call .predict which is quite expensive
+        p_pred = self.predict_P(X)
+        y_pred = self.predict(X)
+        list_idx_add = []
+        flag_break = False
+        list_indI = [indI.copy()]
+        for idx_iter in range(maxExch + N_flexibility):
+            if flag_break:
+                break
+            ## Select centers to be added and to be removed
+            self.train_hist_add['f'].append([])
+            self.train_hist_add['p'].append([])
+            self.train_hist_remove['f'].append([])
+            self.train_hist_remove['p'].append([])
+            # Selection criterion for adding
+            idx_add, self.train_hist_add['f'][-1], self.train_hist_add['p'][-1] = \
+                self.selection_rule_add(y - y_pred, p_pred)     # use p_pred[notIndI] instead of p_pred?
+            indI.append(idx_add)
+            list_idx_add.append(idx_add)
+            # print(idx_add)
+            # Selection criterion for removal   -  only remove in the first maxExch iterations
+            idx_remove, self.train_hist_remove['f'][-1], self.train_hist_remove['p'][-1] = \
+                self.selection_rule_remove(y - y_pred, p_pred)  # use p_pred[notIndI] instead of p_pred?
+            indI.pop(idx_remove)        # remove the idx_remove-th index
+            # Implement something
+            self.ctrs_ = X[indI, :]
+            self.A = self.kernel.eval(self.ctrs_, self.ctrs_)
+            self.invA = np.linalg.inv(self.A)
+            self.coef_ = self.invA @ y[indI, :]
+            self.Cut_ = np.linalg.inv(np.linalg.cholesky(self.A))
+            p_pred = self.predict_P(X)
+            y_pred = self.predict(X)
+            list_indI.append(indI.copy())
+            # Stop if there is a loop
+            for len_loop in [1, 2, 3, 4, 5]:
+                if idx_iter > len_loop:
+                    if list_idx_add[-len_loop:] == list_idx_add[-2*len_loop:-len_loop]:
+                        for _ in range(20):
+                            #   print('loop of length {} detected!'.format(len_loop))
+                            continue
+                        flag_break = True
+                        break
+        # If desired, pick the best model
+        if flag_best_model:
+            idx_best_kea = np.argmin(self.train_hist_add['f'])
+            indI = list_indI[idx_best_kea]
+        # Add the N_flexibility many centers again
+        model_final_add = VKOGA_2L(kernel=self.kernel, beta=self.beta, verbose=False, tol_f=1e-14, tol_p=1e-14)
+        model_final_add.fit(X[indI, :], y[indI, :], maxIter=len(indI))      # use the existing centers
+        indI = [indI[idx] for idx in model_final_add.indI_]
+        notIndI = [idx for idx in range(X.shape[0]) if idx not in indI]  # required because vkoga removes previously chosen points
+        if N_flexibility > 0:   # add N_flexibility many centers (if N_flexibility > 0)
+            model_final_add.fit(X[notIndI, :], y[notIndI], maxIter=N_flexibility)
+            indI = indI + [notIndI[idx] for idx in model_final_add.indI_]
+        # Store these results
+        self.ctrs_ = model_final_add.ctrs_
+        self.c = model_final_add.c # np.concatenate((model_downdate.c, np.array([[0]])), axis=0)
+        self.Cut_ = model_final_add.Cut_
+        self.invA = self.Cut_.T @ self.Cut_   # not required
+        self.coef_ = self.invA @ y[indI, :]
+        # Store some quantities to access intermediate centers
+        self.list_indI = list_indI
+        self._X = X
+        # Set some final quantities
+        self.indI = indI
+        self.notIndI = notIndI
+        self.X_ctrs = X[indI, :]
+        self.y_ctrs = y[indI, :]
+        return self
+    def predict(self, X):
+        # Check if fit has been called
+        # Validate the input
+        X = check_array(X)
+        # Compute prediction
+        if self.ctrs_.shape[0] > 0:
+            prediction = self.kernel.eval(X, self.ctrs_) @ self.coef_
+        else:
+            prediction = np.zeros((X.shape[0], 1))
+        return prediction
+    def predict_P(self, X, n=None):
+        # Prediction of the power function, copied from pgreedy.py
+        # Try to do nothing
+        if self.ctrs_ is None or n == 0:
+            return self.kernel.diagonal(X)
+        # Otherwise check if everything is ok
+        # Check is fit has been called
+        check_is_fitted(self, 'ctrs_')
+        # Validate the input
+        X = check_array(X)
+        # Decide how many centers to use
+        if n is None:
+            n = np.atleast_2d(self.ctrs_).shape[0]
+        # Evaluate the power function on the input
+        p = self.kernel.diagonal(X) - np.sum(
+            (self.kernel.eval(X, np.atleast_2d(self.ctrs_)[:n]) @ self.Cut_[:n, :n].transpose()) ** 2, axis=1)
+        return p
+    def print_message(self, when):
+        if self.verbose and when == 'begin':
+            print('')
+            print('*' * 30 + ' [KEA] ' + '*' * 30)
+            print('Training model with')
+            print('       |_ kernel              : %s' % self.kernel)
+            print('       |_ regularization par. : %2.2e' % self.reg_par)
+            print('       |_ restriction par.    : %2.2e' % self.restr_par)
+            print('')
+        if self.verbose and when == 'end':
+            print('Training completed with')
+            print('       |_ selected points     : %8d / %8d' % (self.train_hist_add['n'][-1], self.ctrs_.shape[0] + self.maxIter))
+            if self.flag_val:
+                print('       |_ train, val residual : %2.2e / %2.2e,    %2.2e' %
+                      (self.train_hist['f'][-1], self.tol_f, self.train_hist['f val'][-1]))
+                print('       |_ train, val power fun: %2.2e / %2.2e,    %2.2e' %
+                      (self.train_hist['p'][-1], self.tol_p, self.train_hist['p val'][-1]))
+            else:
+                print('       |_ train residual      : %2.2e / %2.2e' % (self.train_hist['f'][-1], self.tol_f))
+                print('       |_ train power fun     : %2.2e / %2.2e' % (self.train_hist['p'][-1], self.tol_p))
+        if self.verbose and when == 'track':
+            print('Training ongoing with')
+            print('       |_ selected points     : %8d / %8d' % (self.train_hist_add['n'][-1], self.ctrs_.shape[0] + self.maxIter))
+            if self.flag_val:
+                print('       |_ train, val residual : %2.2e / %2.2e,    %2.2e' %
+                      (self.train_hist['f'][-1], self.tol_f, self.train_hist['f val'][-1]))
+                print('       |_ train, val power fun: %2.2e / %2.2e,    %2.2e' %
+                      (self.train_hist['p'][-1], self.tol_p, self.train_hist['p val'][-1]))
+            else:
+                print('       |_ train residual      : %2.2e / %2.2e' % (self.train_hist['f'][-1], self.tol_f))
+                print('       |_ train power fun     : %2.2e / %2.2e' % (self.train_hist['p'][-1], self.tol_p))