From 92ae582217f303a62d7eef916367a2be9ddbd28a Mon Sep 17 00:00:00 2001
From: "Oh, Sojung" <sojung.oh@studium.uni-hamburg.de>
Date: Fri, 5 Apr 2024 00:01:55 +0000
Subject: [PATCH] Upload New File

---
 ...arket_4models_RF_GBT_RFE_RF_RFE_GBT_.ipynb | 2463 +++++++++++++++++
 1 file changed, 2463 insertions(+)
 create mode 100644 Variable Selection/final_Variable_selection_10var_for_weekendmarket_4models_RF_GBT_RFE_RF_RFE_GBT_.ipynb

diff --git a/Variable Selection/final_Variable_selection_10var_for_weekendmarket_4models_RF_GBT_RFE_RF_RFE_GBT_.ipynb b/Variable Selection/final_Variable_selection_10var_for_weekendmarket_4models_RF_GBT_RFE_RF_RFE_GBT_.ipynb
new file mode 100644
index 0000000..a29a930
--- /dev/null
+++ b/Variable Selection/final_Variable_selection_10var_for_weekendmarket_4models_RF_GBT_RFE_RF_RFE_GBT_.ipynb	
@@ -0,0 +1,2463 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd \n",
+    "import numpy as np \n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# Display more columns\n",
+    "pd.set_option('display.max_columns', 68)\n",
+    "#pd.set_option('display.max_rows', 101)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Selected Variables from the prior discussion (including Stadtteilprofile and Tax data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Anzahl der Einwohner: innen</th>\n",
+       "      <th>Anzahl der Kinder und Jugendlichen unter 18 Jahren</th>\n",
+       "      <th>Anteil Kinder und Jugendlicher unter 18 Jahren an der Gesamt-bevölkerung</th>\n",
+       "      <th>Anzahl älterer Einwohner: innen über 64 Jahren</th>\n",
+       "      <th>Anteil älterer Einwohner: innen über 64 Jahren an der Gesamt-bevölkerung</th>\n",
+       "      <th>Anzahl der Haushalte</th>\n",
+       "      <th>Durch-schnittliche Anzahl der Personen je Haushalt</th>\n",
+       "      <th>Fläche in km²</th>\n",
+       "      <th>Einwohner: innen je km²</th>\n",
+       "      <th>Sozial-versicherungs-pflichtig Beschäftigte am Wohnort</th>\n",
+       "      <th>Anteil sozial-versicherungs-pflichtig Beschäftigter am Wohnort an den Erwerbs-fähigen (15 bis unter 65-Jährige)</th>\n",
+       "      <th>Anzahl der Arbeitslosen</th>\n",
+       "      <th>Anzahl der Wohngebäude</th>\n",
+       "      <th>Anzahl der Wohnungen</th>\n",
+       "      <th>Durch-schnittliche Wohnungs-größe in m²</th>\n",
+       "      <th>Durch-schnittliche Wohnfläche je Einwohner:in in m²</th>\n",
+       "      <th>Anzahl der Sozial-wohnungen</th>\n",
+       "      <th>Anteil der Sozial-wohnungen an allen Wohnungen</th>\n",
+       "      <th>Durch-schnittlicher Immobilien-preis für ein Grundstück in EUR/m²</th>\n",
+       "      <th>Durch-schnittlicher Immobilien-preis für eine Eigentums-wohnung in EUR/m²</th>\n",
+       "      <th>Anzahl der Wohnungen in Ein- und Zweifamilien-häusern</th>\n",
+       "      <th>Anteil der Wohnungen in Ein- und Zweifamilien-häusern an allen Wohnungen</th>\n",
+       "      <th>Anzahl der Einpersonen-haushalte</th>\n",
+       "      <th>Anteil der Haushalte, in denen nur eine Person lebt, an allen Haushalten</th>\n",
+       "      <th>Gesamtbetrag der Einkünfte - [Steuerpflichtig]</th>\n",
+       "      <th>Gesamtbetrag der Einkünfte - [1000€]</th>\n",
+       "      <th>Festgesetzte Einkommenssteuer/ Jahreslohnsteuer - [1000€]</th>\n",
+       "      <th>Gesamtbetrag Einkünfte Mittelwert - [€]</th>\n",
+       "      <th>Gesamtbetrag Einkünfte Median - [€]</th>\n",
+       "      <th>market_count</th>\n",
+       "      <th>farms_count</th>\n",
+       "      <th>greencrocers_count</th>\n",
+       "      <th>supermarkets_count</th>\n",
+       "      <th>biosupermarkets_count</th>\n",
+       "      <th>all_restaurants_count</th>\n",
+       "      <th>organic_restaurants_count</th>\n",
+       "      <th>vegan_restaurants_count</th>\n",
+       "      <th>art_score</th>\n",
+       "      <th>distance_rathaus</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>stadtteil</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Hamburg-Altstadt</th>\n",
+       "      <td>3182.0</td>\n",
+       "      <td>515.0</td>\n",
+       "      <td>16.2</td>\n",
+       "      <td>316.0</td>\n",
+       "      <td>9.9</td>\n",
+       "      <td>1884.0</td>\n",
+       "      <td>1.7</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>2447.0</td>\n",
+       "      <td>1346.0</td>\n",
+       "      <td>55.6</td>\n",
+       "      <td>324.0</td>\n",
+       "      <td>103.0</td>\n",
+       "      <td>1487.0</td>\n",
+       "      <td>74.1</td>\n",
+       "      <td>34.6</td>\n",
+       "      <td>176.0</td>\n",
+       "      <td>11.8</td>\n",
+       "      <td>2366.0</td>\n",
+       "      <td>4869.0</td>\n",
+       "      <td>17.0</td>\n",
+       "      <td>1.1</td>\n",
+       "      <td>1057.0</td>\n",
+       "      <td>56.1</td>\n",
+       "      <td>1952.0</td>\n",
+       "      <td>61168.0</td>\n",
+       "      <td>11577.0</td>\n",
+       "      <td>31336.0</td>\n",
+       "      <td>10811.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>129</td>\n",
+       "      <td>0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2.565476</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>HafenCity</th>\n",
+       "      <td>6950.0</td>\n",
+       "      <td>1386.0</td>\n",
+       "      <td>19.9</td>\n",
+       "      <td>644.0</td>\n",
+       "      <td>9.3</td>\n",
+       "      <td>3183.0</td>\n",
+       "      <td>2.2</td>\n",
+       "      <td>2.4</td>\n",
+       "      <td>2865.0</td>\n",
+       "      <td>3087.0</td>\n",
+       "      <td>61.6</td>\n",
+       "      <td>147.0</td>\n",
+       "      <td>141.0</td>\n",
+       "      <td>3898.0</td>\n",
+       "      <td>81.4</td>\n",
+       "      <td>45.7</td>\n",
+       "      <td>1074.0</td>\n",
+       "      <td>27.6</td>\n",
+       "      <td>3031.0</td>\n",
+       "      <td>10746.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>1126.0</td>\n",
+       "      <td>35.4</td>\n",
+       "      <td>1255.0</td>\n",
+       "      <td>116973.0</td>\n",
+       "      <td>34051.0</td>\n",
+       "      <td>93206.0</td>\n",
+       "      <td>57913.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>56</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.952381</td>\n",
+       "      <td>0.005758</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Neustadt</th>\n",
+       "      <td>12649.0</td>\n",
+       "      <td>1412.0</td>\n",
+       "      <td>11.2</td>\n",
+       "      <td>1919.0</td>\n",
+       "      <td>15.2</td>\n",
+       "      <td>8683.0</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>2.3</td>\n",
+       "      <td>5592.0</td>\n",
+       "      <td>6350.0</td>\n",
+       "      <td>66.9</td>\n",
+       "      <td>493.0</td>\n",
+       "      <td>652.0</td>\n",
+       "      <td>7700.0</td>\n",
+       "      <td>63.1</td>\n",
+       "      <td>38.4</td>\n",
+       "      <td>992.0</td>\n",
+       "      <td>12.9</td>\n",
+       "      <td>2304.0</td>\n",
+       "      <td>8240.0</td>\n",
+       "      <td>70.0</td>\n",
+       "      <td>0.9</td>\n",
+       "      <td>5994.0</td>\n",
+       "      <td>69.0</td>\n",
+       "      <td>7015.0</td>\n",
+       "      <td>242164.0</td>\n",
+       "      <td>46861.0</td>\n",
+       "      <td>34521.0</td>\n",
+       "      <td>24715.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>140</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2.702381</td>\n",
+       "      <td>0.001174</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>St. Pauli</th>\n",
+       "      <td>22056.0</td>\n",
+       "      <td>2941.0</td>\n",
+       "      <td>13.3</td>\n",
+       "      <td>2270.0</td>\n",
+       "      <td>10.3</td>\n",
+       "      <td>14772.0</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>2.2</td>\n",
+       "      <td>9836.0</td>\n",
+       "      <td>9903.0</td>\n",
+       "      <td>57.2</td>\n",
+       "      <td>1535.0</td>\n",
+       "      <td>1293.0</td>\n",
+       "      <td>12667.0</td>\n",
+       "      <td>64.2</td>\n",
+       "      <td>36.9</td>\n",
+       "      <td>1522.0</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>1998.0</td>\n",
+       "      <td>7716.0</td>\n",
+       "      <td>173.0</td>\n",
+       "      <td>1.4</td>\n",
+       "      <td>10184.0</td>\n",
+       "      <td>68.9</td>\n",
+       "      <td>11066.0</td>\n",
+       "      <td>309596.0</td>\n",
+       "      <td>55589.0</td>\n",
+       "      <td>27977.0</td>\n",
+       "      <td>19399.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>0</td>\n",
+       "      <td>109</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1.851190</td>\n",
+       "      <td>0.009631</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>St. Georg</th>\n",
+       "      <td>12318.0</td>\n",
+       "      <td>1420.0</td>\n",
+       "      <td>11.5</td>\n",
+       "      <td>1676.0</td>\n",
+       "      <td>13.6</td>\n",
+       "      <td>7720.0</td>\n",
+       "      <td>1.6</td>\n",
+       "      <td>1.8</td>\n",
+       "      <td>6758.0</td>\n",
+       "      <td>5424.0</td>\n",
+       "      <td>57.6</td>\n",
+       "      <td>659.0</td>\n",
+       "      <td>561.0</td>\n",
+       "      <td>6444.0</td>\n",
+       "      <td>71.1</td>\n",
+       "      <td>37.2</td>\n",
+       "      <td>764.0</td>\n",
+       "      <td>11.9</td>\n",
+       "      <td>1996.0</td>\n",
+       "      <td>7961.0</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>5043.0</td>\n",
+       "      <td>65.3</td>\n",
+       "      <td>5683.0</td>\n",
+       "      <td>250742.0</td>\n",
+       "      <td>58371.0</td>\n",
+       "      <td>44121.0</td>\n",
+       "      <td>27161.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>11</td>\n",
+       "      <td>0</td>\n",
+       "      <td>80</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2.523810</td>\n",
+       "      <td>0.008493</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Hausbruch</th>\n",
+       "      <td>16868.0</td>\n",
+       "      <td>3196.0</td>\n",
+       "      <td>18.9</td>\n",
+       "      <td>3554.0</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>7829.0</td>\n",
+       "      <td>2.1</td>\n",
+       "      <td>9.8</td>\n",
+       "      <td>1715.0</td>\n",
+       "      <td>6492.0</td>\n",
+       "      <td>60.8</td>\n",
+       "      <td>758.0</td>\n",
+       "      <td>3053.0</td>\n",
+       "      <td>7323.0</td>\n",
+       "      <td>85.0</td>\n",
+       "      <td>36.9</td>\n",
+       "      <td>1030.0</td>\n",
+       "      <td>14.1</td>\n",
+       "      <td>541.0</td>\n",
+       "      <td>3877.0</td>\n",
+       "      <td>2793.0</td>\n",
+       "      <td>38.1</td>\n",
+       "      <td>3134.0</td>\n",
+       "      <td>40.0</td>\n",
+       "      <td>7349.0</td>\n",
+       "      <td>227990.0</td>\n",
+       "      <td>36179.0</td>\n",
+       "      <td>31023.0</td>\n",
+       "      <td>21355.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.422619</td>\n",
+       "      <td>0.104116</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Neugraben-Fischbek</th>\n",
+       "      <td>33963.0</td>\n",
+       "      <td>7480.0</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>6318.0</td>\n",
+       "      <td>18.6</td>\n",
+       "      <td>15602.0</td>\n",
+       "      <td>2.2</td>\n",
+       "      <td>22.5</td>\n",
+       "      <td>1508.0</td>\n",
+       "      <td>12746.0</td>\n",
+       "      <td>59.9</td>\n",
+       "      <td>1493.0</td>\n",
+       "      <td>6669.0</td>\n",
+       "      <td>14755.0</td>\n",
+       "      <td>87.0</td>\n",
+       "      <td>37.8</td>\n",
+       "      <td>1078.0</td>\n",
+       "      <td>7.3</td>\n",
+       "      <td>554.0</td>\n",
+       "      <td>3912.0</td>\n",
+       "      <td>6124.0</td>\n",
+       "      <td>41.5</td>\n",
+       "      <td>6247.0</td>\n",
+       "      <td>40.0</td>\n",
+       "      <td>12290.0</td>\n",
+       "      <td>382231.0</td>\n",
+       "      <td>60244.0</td>\n",
+       "      <td>31101.0</td>\n",
+       "      <td>22492.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.857143</td>\n",
+       "      <td>0.130211</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Francop</th>\n",
+       "      <td>736.0</td>\n",
+       "      <td>119.0</td>\n",
+       "      <td>16.2</td>\n",
+       "      <td>133.0</td>\n",
+       "      <td>18.1</td>\n",
+       "      <td>374.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>8.8</td>\n",
+       "      <td>84.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>208.0</td>\n",
+       "      <td>347.0</td>\n",
+       "      <td>98.5</td>\n",
+       "      <td>46.4</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>427.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>255.0</td>\n",
+       "      <td>73.5</td>\n",
+       "      <td>161.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>356.0</td>\n",
+       "      <td>12738.0</td>\n",
+       "      <td>2083.0</td>\n",
+       "      <td>35782.0</td>\n",
+       "      <td>26568.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.114626</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Neuenfelde</th>\n",
+       "      <td>5245.0</td>\n",
+       "      <td>1251.0</td>\n",
+       "      <td>23.9</td>\n",
+       "      <td>748.0</td>\n",
+       "      <td>14.3</td>\n",
+       "      <td>2337.0</td>\n",
+       "      <td>2.2</td>\n",
+       "      <td>15.7</td>\n",
+       "      <td>335.0</td>\n",
+       "      <td>1957.0</td>\n",
+       "      <td>57.1</td>\n",
+       "      <td>253.0</td>\n",
+       "      <td>1103.0</td>\n",
+       "      <td>2037.0</td>\n",
+       "      <td>95.4</td>\n",
+       "      <td>37.1</td>\n",
+       "      <td>309.0</td>\n",
+       "      <td>15.2</td>\n",
+       "      <td>401.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1261.0</td>\n",
+       "      <td>61.9</td>\n",
+       "      <td>1033.0</td>\n",
+       "      <td>44.2</td>\n",
+       "      <td>1909.0</td>\n",
+       "      <td>62765.0</td>\n",
+       "      <td>10422.0</td>\n",
+       "      <td>32879.0</td>\n",
+       "      <td>22909.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.375000</td>\n",
+       "      <td>0.162791</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Cranz</th>\n",
+       "      <td>810.0</td>\n",
+       "      <td>130.0</td>\n",
+       "      <td>16.0</td>\n",
+       "      <td>154.0</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>460.0</td>\n",
+       "      <td>1.8</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>608.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>170.0</td>\n",
+       "      <td>405.0</td>\n",
+       "      <td>87.5</td>\n",
+       "      <td>43.7</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>451.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>146.0</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>252.0</td>\n",
+       "      <td>54.8</td>\n",
+       "      <td>378.0</td>\n",
+       "      <td>11845.0</td>\n",
+       "      <td>2018.0</td>\n",
+       "      <td>31335.0</td>\n",
+       "      <td>22852.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.200686</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>99 rows × 39 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    Anzahl der Einwohner: innen  \\\n",
+       "stadtteil                                         \n",
+       "Hamburg-Altstadt                         3182.0   \n",
+       "HafenCity                                6950.0   \n",
+       "Neustadt                                12649.0   \n",
+       "St. Pauli                               22056.0   \n",
+       "St. Georg                               12318.0   \n",
+       "...                                         ...   \n",
+       "Hausbruch                               16868.0   \n",
+       "Neugraben-Fischbek                      33963.0   \n",
+       "Francop                                   736.0   \n",
+       "Neuenfelde                               5245.0   \n",
+       "Cranz                                     810.0   \n",
+       "\n",
+       "                    Anzahl der Kinder und Jugendlichen unter 18 Jahren  \\\n",
+       "stadtteil                                                                \n",
+       "Hamburg-Altstadt                                                515.0    \n",
+       "HafenCity                                                      1386.0    \n",
+       "Neustadt                                                       1412.0    \n",
+       "St. Pauli                                                      2941.0    \n",
+       "St. Georg                                                      1420.0    \n",
+       "...                                                               ...    \n",
+       "Hausbruch                                                      3196.0    \n",
+       "Neugraben-Fischbek                                             7480.0    \n",
+       "Francop                                                         119.0    \n",
+       "Neuenfelde                                                     1251.0    \n",
+       "Cranz                                                           130.0    \n",
+       "\n",
+       "                    Anteil Kinder und Jugendlicher unter 18 Jahren an der Gesamt-bevölkerung  \\\n",
+       "stadtteil                                                                                      \n",
+       "Hamburg-Altstadt                                                 16.2                          \n",
+       "HafenCity                                                        19.9                          \n",
+       "Neustadt                                                         11.2                          \n",
+       "St. Pauli                                                        13.3                          \n",
+       "St. Georg                                                        11.5                          \n",
+       "...                                                               ...                          \n",
+       "Hausbruch                                                        18.9                          \n",
+       "Neugraben-Fischbek                                               22.0                          \n",
+       "Francop                                                          16.2                          \n",
+       "Neuenfelde                                                       23.9                          \n",
+       "Cranz                                                            16.0                          \n",
+       "\n",
+       "                    Anzahl älterer Einwohner: innen über 64 Jahren  \\\n",
+       "stadtteil                                                            \n",
+       "Hamburg-Altstadt                                             316.0   \n",
+       "HafenCity                                                    644.0   \n",
+       "Neustadt                                                    1919.0   \n",
+       "St. Pauli                                                   2270.0   \n",
+       "St. Georg                                                   1676.0   \n",
+       "...                                                            ...   \n",
+       "Hausbruch                                                   3554.0   \n",
+       "Neugraben-Fischbek                                          6318.0   \n",
+       "Francop                                                      133.0   \n",
+       "Neuenfelde                                                   748.0   \n",
+       "Cranz                                                        154.0   \n",
+       "\n",
+       "                    Anteil älterer Einwohner: innen über 64 Jahren an der Gesamt-bevölkerung  \\\n",
+       "stadtteil                                                                                      \n",
+       "Hamburg-Altstadt                                                  9.9                          \n",
+       "HafenCity                                                         9.3                          \n",
+       "Neustadt                                                         15.2                          \n",
+       "St. Pauli                                                        10.3                          \n",
+       "St. Georg                                                        13.6                          \n",
+       "...                                                               ...                          \n",
+       "Hausbruch                                                        21.1                          \n",
+       "Neugraben-Fischbek                                               18.6                          \n",
+       "Francop                                                          18.1                          \n",
+       "Neuenfelde                                                       14.3                          \n",
+       "Cranz                                                            19.0                          \n",
+       "\n",
+       "                    Anzahl der Haushalte  \\\n",
+       "stadtteil                                  \n",
+       "Hamburg-Altstadt                  1884.0   \n",
+       "HafenCity                         3183.0   \n",
+       "Neustadt                          8683.0   \n",
+       "St. Pauli                        14772.0   \n",
+       "St. Georg                         7720.0   \n",
+       "...                                  ...   \n",
+       "Hausbruch                         7829.0   \n",
+       "Neugraben-Fischbek               15602.0   \n",
+       "Francop                            374.0   \n",
+       "Neuenfelde                        2337.0   \n",
+       "Cranz                              460.0   \n",
+       "\n",
+       "                    Durch-schnittliche Anzahl der Personen je Haushalt  \\\n",
+       "stadtteil                                                                \n",
+       "Hamburg-Altstadt                                                  1.7    \n",
+       "HafenCity                                                         2.2    \n",
+       "Neustadt                                                          1.5    \n",
+       "St. Pauli                                                         1.5    \n",
+       "St. Georg                                                         1.6    \n",
+       "...                                                               ...    \n",
+       "Hausbruch                                                         2.1    \n",
+       "Neugraben-Fischbek                                                2.2    \n",
+       "Francop                                                           2.0    \n",
+       "Neuenfelde                                                        2.2    \n",
+       "Cranz                                                             1.8    \n",
+       "\n",
+       "                    Fläche in km²  Einwohner: innen je km²  \\\n",
+       "stadtteil                                                    \n",
+       "Hamburg-Altstadt              1.3                   2447.0   \n",
+       "HafenCity                     2.4                   2865.0   \n",
+       "Neustadt                      2.3                   5592.0   \n",
+       "St. Pauli                     2.2                   9836.0   \n",
+       "St. Georg                     1.8                   6758.0   \n",
+       "...                           ...                      ...   \n",
+       "Hausbruch                     9.8                   1715.0   \n",
+       "Neugraben-Fischbek           22.5                   1508.0   \n",
+       "Francop                       8.8                     84.0   \n",
+       "Neuenfelde                   15.7                    335.0   \n",
+       "Cranz                         1.3                    608.0   \n",
+       "\n",
+       "                    Sozial-versicherungs-pflichtig Beschäftigte am Wohnort  \\\n",
+       "stadtteil                                                                    \n",
+       "Hamburg-Altstadt                                               1346.0        \n",
+       "HafenCity                                                      3087.0        \n",
+       "Neustadt                                                       6350.0        \n",
+       "St. Pauli                                                      9903.0        \n",
+       "St. Georg                                                      5424.0        \n",
+       "...                                                               ...        \n",
+       "Hausbruch                                                      6492.0        \n",
+       "Neugraben-Fischbek                                            12746.0        \n",
+       "Francop                                                           NaN        \n",
+       "Neuenfelde                                                     1957.0        \n",
+       "Cranz                                                             NaN        \n",
+       "\n",
+       "                    Anteil sozial-versicherungs-pflichtig Beschäftigter am Wohnort an den Erwerbs-fähigen (15 bis unter 65-Jährige)  \\\n",
+       "stadtteil                                                                                                                             \n",
+       "Hamburg-Altstadt                                                 55.6                                                                 \n",
+       "HafenCity                                                        61.6                                                                 \n",
+       "Neustadt                                                         66.9                                                                 \n",
+       "St. Pauli                                                        57.2                                                                 \n",
+       "St. Georg                                                        57.6                                                                 \n",
+       "...                                                               ...                                                                 \n",
+       "Hausbruch                                                        60.8                                                                 \n",
+       "Neugraben-Fischbek                                               59.9                                                                 \n",
+       "Francop                                                           NaN                                                                 \n",
+       "Neuenfelde                                                       57.1                                                                 \n",
+       "Cranz                                                             NaN                                                                 \n",
+       "\n",
+       "                    Anzahl der Arbeitslosen  Anzahl der Wohngebäude  \\\n",
+       "stadtteil                                                             \n",
+       "Hamburg-Altstadt                      324.0                   103.0   \n",
+       "HafenCity                             147.0                   141.0   \n",
+       "Neustadt                              493.0                   652.0   \n",
+       "St. Pauli                            1535.0                  1293.0   \n",
+       "St. Georg                             659.0                   561.0   \n",
+       "...                                     ...                     ...   \n",
+       "Hausbruch                             758.0                  3053.0   \n",
+       "Neugraben-Fischbek                   1493.0                  6669.0   \n",
+       "Francop                                 NaN                   208.0   \n",
+       "Neuenfelde                            253.0                  1103.0   \n",
+       "Cranz                                   NaN                   170.0   \n",
+       "\n",
+       "                    Anzahl der Wohnungen  \\\n",
+       "stadtteil                                  \n",
+       "Hamburg-Altstadt                  1487.0   \n",
+       "HafenCity                         3898.0   \n",
+       "Neustadt                          7700.0   \n",
+       "St. Pauli                        12667.0   \n",
+       "St. Georg                         6444.0   \n",
+       "...                                  ...   \n",
+       "Hausbruch                         7323.0   \n",
+       "Neugraben-Fischbek               14755.0   \n",
+       "Francop                            347.0   \n",
+       "Neuenfelde                        2037.0   \n",
+       "Cranz                              405.0   \n",
+       "\n",
+       "                    Durch-schnittliche Wohnungs-größe in m²  \\\n",
+       "stadtteil                                                     \n",
+       "Hamburg-Altstadt                                       74.1   \n",
+       "HafenCity                                              81.4   \n",
+       "Neustadt                                               63.1   \n",
+       "St. Pauli                                              64.2   \n",
+       "St. Georg                                              71.1   \n",
+       "...                                                     ...   \n",
+       "Hausbruch                                              85.0   \n",
+       "Neugraben-Fischbek                                     87.0   \n",
+       "Francop                                                98.5   \n",
+       "Neuenfelde                                             95.4   \n",
+       "Cranz                                                  87.5   \n",
+       "\n",
+       "                    Durch-schnittliche Wohnfläche je Einwohner:in in m²  \\\n",
+       "stadtteil                                                                 \n",
+       "Hamburg-Altstadt                                                 34.6     \n",
+       "HafenCity                                                        45.7     \n",
+       "Neustadt                                                         38.4     \n",
+       "St. Pauli                                                        36.9     \n",
+       "St. Georg                                                        37.2     \n",
+       "...                                                               ...     \n",
+       "Hausbruch                                                        36.9     \n",
+       "Neugraben-Fischbek                                               37.8     \n",
+       "Francop                                                          46.4     \n",
+       "Neuenfelde                                                       37.1     \n",
+       "Cranz                                                            43.7     \n",
+       "\n",
+       "                    Anzahl der Sozial-wohnungen  \\\n",
+       "stadtteil                                         \n",
+       "Hamburg-Altstadt                          176.0   \n",
+       "HafenCity                                1074.0   \n",
+       "Neustadt                                  992.0   \n",
+       "St. Pauli                                1522.0   \n",
+       "St. Georg                                 764.0   \n",
+       "...                                         ...   \n",
+       "Hausbruch                                1030.0   \n",
+       "Neugraben-Fischbek                       1078.0   \n",
+       "Francop                                     0.0   \n",
+       "Neuenfelde                                309.0   \n",
+       "Cranz                                       0.0   \n",
+       "\n",
+       "                    Anteil der Sozial-wohnungen an allen Wohnungen  \\\n",
+       "stadtteil                                                            \n",
+       "Hamburg-Altstadt                                              11.8   \n",
+       "HafenCity                                                     27.6   \n",
+       "Neustadt                                                      12.9   \n",
+       "St. Pauli                                                     12.0   \n",
+       "St. Georg                                                     11.9   \n",
+       "...                                                            ...   \n",
+       "Hausbruch                                                     14.1   \n",
+       "Neugraben-Fischbek                                             7.3   \n",
+       "Francop                                                        0.0   \n",
+       "Neuenfelde                                                    15.2   \n",
+       "Cranz                                                          0.0   \n",
+       "\n",
+       "                    Durch-schnittlicher Immobilien-preis für ein Grundstück in EUR/m²  \\\n",
+       "stadtteil                                                                               \n",
+       "Hamburg-Altstadt                                               2366.0                   \n",
+       "HafenCity                                                      3031.0                   \n",
+       "Neustadt                                                       2304.0                   \n",
+       "St. Pauli                                                      1998.0                   \n",
+       "St. Georg                                                      1996.0                   \n",
+       "...                                                               ...                   \n",
+       "Hausbruch                                                       541.0                   \n",
+       "Neugraben-Fischbek                                              554.0                   \n",
+       "Francop                                                         427.0                   \n",
+       "Neuenfelde                                                      401.0                   \n",
+       "Cranz                                                           451.0                   \n",
+       "\n",
+       "                    Durch-schnittlicher Immobilien-preis für eine Eigentums-wohnung in EUR/m²  \\\n",
+       "stadtteil                                                                                       \n",
+       "Hamburg-Altstadt                                               4869.0                           \n",
+       "HafenCity                                                     10746.0                           \n",
+       "Neustadt                                                       8240.0                           \n",
+       "St. Pauli                                                      7716.0                           \n",
+       "St. Georg                                                      7961.0                           \n",
+       "...                                                               ...                           \n",
+       "Hausbruch                                                      3877.0                           \n",
+       "Neugraben-Fischbek                                             3912.0                           \n",
+       "Francop                                                           NaN                           \n",
+       "Neuenfelde                                                        NaN                           \n",
+       "Cranz                                                             NaN                           \n",
+       "\n",
+       "                    Anzahl der Wohnungen in Ein- und Zweifamilien-häusern  \\\n",
+       "stadtteil                                                                   \n",
+       "Hamburg-Altstadt                                                 17.0       \n",
+       "HafenCity                                                         5.0       \n",
+       "Neustadt                                                         70.0       \n",
+       "St. Pauli                                                       173.0       \n",
+       "St. Georg                                                        63.0       \n",
+       "...                                                               ...       \n",
+       "Hausbruch                                                      2793.0       \n",
+       "Neugraben-Fischbek                                             6124.0       \n",
+       "Francop                                                         255.0       \n",
+       "Neuenfelde                                                     1261.0       \n",
+       "Cranz                                                           146.0       \n",
+       "\n",
+       "                    Anteil der Wohnungen in Ein- und Zweifamilien-häusern an allen Wohnungen  \\\n",
+       "stadtteil                                                                                      \n",
+       "Hamburg-Altstadt                                                  1.1                          \n",
+       "HafenCity                                                         0.1                          \n",
+       "Neustadt                                                          0.9                          \n",
+       "St. Pauli                                                         1.4                          \n",
+       "St. Georg                                                         1.0                          \n",
+       "...                                                               ...                          \n",
+       "Hausbruch                                                        38.1                          \n",
+       "Neugraben-Fischbek                                               41.5                          \n",
+       "Francop                                                          73.5                          \n",
+       "Neuenfelde                                                       61.9                          \n",
+       "Cranz                                                            36.0                          \n",
+       "\n",
+       "                    Anzahl der Einpersonen-haushalte  \\\n",
+       "stadtteil                                              \n",
+       "Hamburg-Altstadt                              1057.0   \n",
+       "HafenCity                                     1126.0   \n",
+       "Neustadt                                      5994.0   \n",
+       "St. Pauli                                    10184.0   \n",
+       "St. Georg                                     5043.0   \n",
+       "...                                              ...   \n",
+       "Hausbruch                                     3134.0   \n",
+       "Neugraben-Fischbek                            6247.0   \n",
+       "Francop                                        161.0   \n",
+       "Neuenfelde                                    1033.0   \n",
+       "Cranz                                          252.0   \n",
+       "\n",
+       "                    Anteil der Haushalte, in denen nur eine Person lebt, an allen Haushalten  \\\n",
+       "stadtteil                                                                                      \n",
+       "Hamburg-Altstadt                                                 56.1                          \n",
+       "HafenCity                                                        35.4                          \n",
+       "Neustadt                                                         69.0                          \n",
+       "St. Pauli                                                        68.9                          \n",
+       "St. Georg                                                        65.3                          \n",
+       "...                                                               ...                          \n",
+       "Hausbruch                                                        40.0                          \n",
+       "Neugraben-Fischbek                                               40.0                          \n",
+       "Francop                                                          43.0                          \n",
+       "Neuenfelde                                                       44.2                          \n",
+       "Cranz                                                            54.8                          \n",
+       "\n",
+       "                    Gesamtbetrag der Einkünfte - [Steuerpflichtig]  \\\n",
+       "stadtteil                                                            \n",
+       "Hamburg-Altstadt                                            1952.0   \n",
+       "HafenCity                                                   1255.0   \n",
+       "Neustadt                                                    7015.0   \n",
+       "St. Pauli                                                  11066.0   \n",
+       "St. Georg                                                   5683.0   \n",
+       "...                                                            ...   \n",
+       "Hausbruch                                                   7349.0   \n",
+       "Neugraben-Fischbek                                         12290.0   \n",
+       "Francop                                                      356.0   \n",
+       "Neuenfelde                                                  1909.0   \n",
+       "Cranz                                                        378.0   \n",
+       "\n",
+       "                    Gesamtbetrag der Einkünfte - [1000€]  \\\n",
+       "stadtteil                                                  \n",
+       "Hamburg-Altstadt                                 61168.0   \n",
+       "HafenCity                                       116973.0   \n",
+       "Neustadt                                        242164.0   \n",
+       "St. Pauli                                       309596.0   \n",
+       "St. Georg                                       250742.0   \n",
+       "...                                                  ...   \n",
+       "Hausbruch                                       227990.0   \n",
+       "Neugraben-Fischbek                              382231.0   \n",
+       "Francop                                          12738.0   \n",
+       "Neuenfelde                                       62765.0   \n",
+       "Cranz                                            11845.0   \n",
+       "\n",
+       "                    Festgesetzte Einkommenssteuer/ Jahreslohnsteuer - [1000€]  \\\n",
+       "stadtteil                                                                       \n",
+       "Hamburg-Altstadt                                              11577.0           \n",
+       "HafenCity                                                     34051.0           \n",
+       "Neustadt                                                      46861.0           \n",
+       "St. Pauli                                                     55589.0           \n",
+       "St. Georg                                                     58371.0           \n",
+       "...                                                               ...           \n",
+       "Hausbruch                                                     36179.0           \n",
+       "Neugraben-Fischbek                                            60244.0           \n",
+       "Francop                                                        2083.0           \n",
+       "Neuenfelde                                                    10422.0           \n",
+       "Cranz                                                          2018.0           \n",
+       "\n",
+       "                    Gesamtbetrag Einkünfte Mittelwert - [€]  \\\n",
+       "stadtteil                                                     \n",
+       "Hamburg-Altstadt                                    31336.0   \n",
+       "HafenCity                                           93206.0   \n",
+       "Neustadt                                            34521.0   \n",
+       "St. Pauli                                           27977.0   \n",
+       "St. Georg                                           44121.0   \n",
+       "...                                                     ...   \n",
+       "Hausbruch                                           31023.0   \n",
+       "Neugraben-Fischbek                                  31101.0   \n",
+       "Francop                                             35782.0   \n",
+       "Neuenfelde                                          32879.0   \n",
+       "Cranz                                               31335.0   \n",
+       "\n",
+       "                    Gesamtbetrag Einkünfte Median - [€]  market_count  \\\n",
+       "stadtteil                                                               \n",
+       "Hamburg-Altstadt                                10811.0             2   \n",
+       "HafenCity                                       57913.0             1   \n",
+       "Neustadt                                        24715.0             1   \n",
+       "St. Pauli                                       19399.0             1   \n",
+       "St. Georg                                       27161.0             2   \n",
+       "...                                                 ...           ...   \n",
+       "Hausbruch                                       21355.0             0   \n",
+       "Neugraben-Fischbek                              22492.0             1   \n",
+       "Francop                                         26568.0             0   \n",
+       "Neuenfelde                                      22909.0             0   \n",
+       "Cranz                                           22852.0             0   \n",
+       "\n",
+       "                    farms_count  greencrocers_count  supermarkets_count  \\\n",
+       "stadtteil                                                                 \n",
+       "Hamburg-Altstadt              0                   1                   4   \n",
+       "HafenCity                     0                   0                   6   \n",
+       "Neustadt                      0                   2                   6   \n",
+       "St. Pauli                     0                   1                  10   \n",
+       "St. Georg                     0                   0                  11   \n",
+       "...                         ...                 ...                 ...   \n",
+       "Hausbruch                     0                   1                   1   \n",
+       "Neugraben-Fischbek            0                   0                   4   \n",
+       "Francop                       0                   0                   0   \n",
+       "Neuenfelde                    0                   1                   0   \n",
+       "Cranz                         0                   0                   0   \n",
+       "\n",
+       "                    biosupermarkets_count  all_restaurants_count  \\\n",
+       "stadtteil                                                          \n",
+       "Hamburg-Altstadt                        0                    129   \n",
+       "HafenCity                               1                     56   \n",
+       "Neustadt                                1                    140   \n",
+       "St. Pauli                               0                    109   \n",
+       "St. Georg                               0                     80   \n",
+       "...                                   ...                    ...   \n",
+       "Hausbruch                               0                      1   \n",
+       "Neugraben-Fischbek                      1                      8   \n",
+       "Francop                                 0                      0   \n",
+       "Neuenfelde                              0                      2   \n",
+       "Cranz                                   0                      2   \n",
+       "\n",
+       "                    organic_restaurants_count  vegan_restaurants_count  \\\n",
+       "stadtteil                                                                \n",
+       "Hamburg-Altstadt                            0                        4   \n",
+       "HafenCity                                   3                        1   \n",
+       "Neustadt                                    4                        4   \n",
+       "St. Pauli                                   1                        2   \n",
+       "St. Georg                                   2                        4   \n",
+       "...                                       ...                      ...   \n",
+       "Hausbruch                                   0                        0   \n",
+       "Neugraben-Fischbek                          0                        0   \n",
+       "Francop                                     0                        0   \n",
+       "Neuenfelde                                  0                        0   \n",
+       "Cranz                                       0                        0   \n",
+       "\n",
+       "                    art_score  distance_rathaus  \n",
+       "stadtteil                                        \n",
+       "Hamburg-Altstadt     2.565476          0.000000  \n",
+       "HafenCity            1.952381          0.005758  \n",
+       "Neustadt             2.702381          0.001174  \n",
+       "St. Pauli            1.851190          0.009631  \n",
+       "St. Georg            2.523810          0.008493  \n",
+       "...                       ...               ...  \n",
+       "Hausbruch            0.422619          0.104116  \n",
+       "Neugraben-Fischbek   1.857143          0.130211  \n",
+       "Francop              0.000000          0.114626  \n",
+       "Neuenfelde           0.375000          0.162791  \n",
+       "Cranz                0.000000          0.200686  \n",
+       "\n",
+       "[99 rows x 39 columns]"
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#----------------Merge the two dataframe (dcount_df, selected_df)------------------\n",
+    "\n",
+    "#path1 = \"C:/Users/HOME/Git/project-version-2-1/Data/final_data.csv\"\n",
+    "path1 = Data/final_data.csv\n",
+    "final_df = pd.read_csv(path1)\n",
+    "\n",
+    "final_df.columns.values[0] = \"stadtteil\"\n",
+    "final_df.set_index(final_df.columns[0], inplace=True)\n",
+    "final_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Data pre-processing (replacing NaNs into column averages)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total number of missing values in no_art_merged_df: 0\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "#--------------------------- pre-processing for data: independent variables and dependant variables('weekend markets')----------------#\n",
+    "\n",
+    "#replace NaNs to column-average \n",
+    "\n",
+    "column_means = final_df.mean()\n",
+    "\n",
+    "merged_df = final_df.fillna(column_means)\n",
+    "\n",
+    "\n",
+    "#check if there are left NaNs in no_art_merged_df\n",
+    "\n",
+    "total_missing_values = merged_df.isna().sum().sum()\n",
+    "\n",
+    "print(\"total number of missing values in no_art_merged_df:\", total_missing_values)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Set 11 variables for predicting weekendmarkets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>greencrocers_count</th>\n",
+       "      <th>supermarkets_count</th>\n",
+       "      <th>biosupermarkets_count</th>\n",
+       "      <th>all_restaurants_count</th>\n",
+       "      <th>organic_restaurants_count</th>\n",
+       "      <th>Einwohner: innen je km²</th>\n",
+       "      <th>distance_rathaus</th>\n",
+       "      <th>Gesamtbetrag Einkünfte Median - [€]</th>\n",
+       "      <th>Anteil der Sozial-wohnungen an allen Wohnungen</th>\n",
+       "      <th>vegan_restaurants_count</th>\n",
+       "      <th>market_count</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>stadtteil</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Hamburg-Altstadt</th>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>129</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2447.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>10811.0</td>\n",
+       "      <td>11.8</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>HafenCity</th>\n",
+       "      <td>0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>56</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2865.0</td>\n",
+       "      <td>0.005758</td>\n",
+       "      <td>57913.0</td>\n",
+       "      <td>27.6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Neustadt</th>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>140</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5592.0</td>\n",
+       "      <td>0.001174</td>\n",
+       "      <td>24715.0</td>\n",
+       "      <td>12.9</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>St. Pauli</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>0</td>\n",
+       "      <td>109</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9836.0</td>\n",
+       "      <td>0.009631</td>\n",
+       "      <td>19399.0</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>St. Georg</th>\n",
+       "      <td>0</td>\n",
+       "      <td>11</td>\n",
+       "      <td>0</td>\n",
+       "      <td>80</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6758.0</td>\n",
+       "      <td>0.008493</td>\n",
+       "      <td>27161.0</td>\n",
+       "      <td>11.9</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Hausbruch</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1715.0</td>\n",
+       "      <td>0.104116</td>\n",
+       "      <td>21355.0</td>\n",
+       "      <td>14.1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Neugraben-Fischbek</th>\n",
+       "      <td>0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1508.0</td>\n",
+       "      <td>0.130211</td>\n",
+       "      <td>22492.0</td>\n",
+       "      <td>7.3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Francop</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>84.0</td>\n",
+       "      <td>0.114626</td>\n",
+       "      <td>26568.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Neuenfelde</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>335.0</td>\n",
+       "      <td>0.162791</td>\n",
+       "      <td>22909.0</td>\n",
+       "      <td>15.2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Cranz</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>608.0</td>\n",
+       "      <td>0.200686</td>\n",
+       "      <td>22852.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>99 rows × 11 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    greencrocers_count  supermarkets_count  \\\n",
+       "stadtteil                                                    \n",
+       "Hamburg-Altstadt                     1                   4   \n",
+       "HafenCity                            0                   6   \n",
+       "Neustadt                             2                   6   \n",
+       "St. Pauli                            1                  10   \n",
+       "St. Georg                            0                  11   \n",
+       "...                                ...                 ...   \n",
+       "Hausbruch                            1                   1   \n",
+       "Neugraben-Fischbek                   0                   4   \n",
+       "Francop                              0                   0   \n",
+       "Neuenfelde                           1                   0   \n",
+       "Cranz                                0                   0   \n",
+       "\n",
+       "                    biosupermarkets_count  all_restaurants_count  \\\n",
+       "stadtteil                                                          \n",
+       "Hamburg-Altstadt                        0                    129   \n",
+       "HafenCity                               1                     56   \n",
+       "Neustadt                                1                    140   \n",
+       "St. Pauli                               0                    109   \n",
+       "St. Georg                               0                     80   \n",
+       "...                                   ...                    ...   \n",
+       "Hausbruch                               0                      1   \n",
+       "Neugraben-Fischbek                      1                      8   \n",
+       "Francop                                 0                      0   \n",
+       "Neuenfelde                              0                      2   \n",
+       "Cranz                                   0                      2   \n",
+       "\n",
+       "                    organic_restaurants_count  Einwohner: innen je km²  \\\n",
+       "stadtteil                                                                \n",
+       "Hamburg-Altstadt                            0                   2447.0   \n",
+       "HafenCity                                   3                   2865.0   \n",
+       "Neustadt                                    4                   5592.0   \n",
+       "St. Pauli                                   1                   9836.0   \n",
+       "St. Georg                                   2                   6758.0   \n",
+       "...                                       ...                      ...   \n",
+       "Hausbruch                                   0                   1715.0   \n",
+       "Neugraben-Fischbek                          0                   1508.0   \n",
+       "Francop                                     0                     84.0   \n",
+       "Neuenfelde                                  0                    335.0   \n",
+       "Cranz                                       0                    608.0   \n",
+       "\n",
+       "                    distance_rathaus  Gesamtbetrag Einkünfte Median - [€]  \\\n",
+       "stadtteil                                                                   \n",
+       "Hamburg-Altstadt            0.000000                              10811.0   \n",
+       "HafenCity                   0.005758                              57913.0   \n",
+       "Neustadt                    0.001174                              24715.0   \n",
+       "St. Pauli                   0.009631                              19399.0   \n",
+       "St. Georg                   0.008493                              27161.0   \n",
+       "...                              ...                                  ...   \n",
+       "Hausbruch                   0.104116                              21355.0   \n",
+       "Neugraben-Fischbek          0.130211                              22492.0   \n",
+       "Francop                     0.114626                              26568.0   \n",
+       "Neuenfelde                  0.162791                              22909.0   \n",
+       "Cranz                       0.200686                              22852.0   \n",
+       "\n",
+       "                    Anteil der Sozial-wohnungen an allen Wohnungen  \\\n",
+       "stadtteil                                                            \n",
+       "Hamburg-Altstadt                                              11.8   \n",
+       "HafenCity                                                     27.6   \n",
+       "Neustadt                                                      12.9   \n",
+       "St. Pauli                                                     12.0   \n",
+       "St. Georg                                                     11.9   \n",
+       "...                                                            ...   \n",
+       "Hausbruch                                                     14.1   \n",
+       "Neugraben-Fischbek                                             7.3   \n",
+       "Francop                                                        0.0   \n",
+       "Neuenfelde                                                    15.2   \n",
+       "Cranz                                                          0.0   \n",
+       "\n",
+       "                    vegan_restaurants_count  market_count  \n",
+       "stadtteil                                                  \n",
+       "Hamburg-Altstadt                          4             2  \n",
+       "HafenCity                                 1             1  \n",
+       "Neustadt                                  4             1  \n",
+       "St. Pauli                                 2             1  \n",
+       "St. Georg                                 4             2  \n",
+       "...                                     ...           ...  \n",
+       "Hausbruch                                 0             0  \n",
+       "Neugraben-Fischbek                        0             1  \n",
+       "Francop                                   0             0  \n",
+       "Neuenfelde                                0             0  \n",
+       "Cranz                                     0             0  \n",
+       "\n",
+       "[99 rows x 11 columns]"
+      ]
+     },
+     "execution_count": 55,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "##  -------------------------- 10 variables to use for predicting weekendmarkets form Discussion ------------------------ ##\n",
+    "\n",
+    "# Name of variables for weekendmarkets (independent, dependent)\n",
+    "var_markets = ['greencrocers_count', 'supermarkets_count', 'biosupermarkets_count', 'all_restaurants_count', 'organic_restaurants_count',\n",
+    "               'Einwohner: innen je km²', 'distance_rathaus', 'Gesamtbetrag Einkünfte Median - [€]', 'Anteil der Sozial-wohnungen an allen Wohnungen',\n",
+    "               'vegan_restaurants_count', 'market_count']\n",
+    "\n",
+    "# Filter stadtteilprofil for these columns\n",
+    "merged_df = merged_df[var_markets]\n",
+    "#sprofile_df.shape\n",
+    "merged_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Make iteration for making random models & Set dependant variable (Weekendmarkets)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_repeats = 100\n",
+    "\n",
+    "X = merged_df.drop(columns=[\"market_count\"])  # independent variables\n",
+    "y = merged_df[\"market_count\"]  # dependent variables"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Random Forest Method (Variable Selection 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import accuracy_score\n",
+    "#------------------------Make records for each iterations-----------------------\n",
+    "\n",
+    "rf_accuracies_list = []\n",
+    "rf_iteration_importances_df = pd.DataFrame(columns=X.columns)\n",
+    "\n",
+    "\n",
+    "for i in range(num_repeats):\n",
+    "    #--------------------------Splitting the data into training and testing sets (80:20 ratio)-----------------------------------\n",
+    "    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)\n",
+    "\n",
+    "\n",
+    "    # Making Random Forest Model and Training\n",
+    "    rf_model = RandomForestClassifier(n_estimators=100, random_state=i)  # use 100 decision trees\n",
+    "    rf_model.fit(X_train, y_train)\n",
+    "\n",
+    "    # check the importance of variables\n",
+    "    rf_importances = rf_model.feature_importances_\n",
+    "\n",
+    "\n",
+    "    #------------------------------------------------------------------------------------\n",
+    "    \n",
+    "    # record the importance of variables\n",
+    "    rf_iteration_importances_df.loc[i] = rf_importances\n",
+    "\n",
+    "    #------------------------------Accuracy for RF modeling--------------------------------------------\n",
+    "\n",
+    "    # Checking accuracy on testing data\n",
+    "    rf_y_pred = rf_model.predict(X_test)\n",
+    "    rf_accuracy = accuracy_score(y_test, rf_y_pred)\n",
+    "    #print(\"Random Forest Accuracy (using all features) on testing data:\", rf_accuracy)\n",
+    "    rf_accuracies_list.append(rf_accuracy)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "RF model features and avg_importances:\n",
+      "all_restaurants_count                             0.219926\n",
+      "supermarkets_count                                0.186518\n",
+      "Anteil der Sozial-wohnungen an allen Wohnungen    0.119212\n",
+      "Einwohner: innen je km²                           0.118131\n",
+      "Gesamtbetrag Einkünfte Median - [€]               0.114430\n",
+      "distance_rathaus                                  0.108481\n",
+      "greencrocers_count                                0.065882\n",
+      "biosupermarkets_count                             0.033396\n",
+      "vegan_restaurants_count                           0.024088\n",
+      "organic_restaurants_count                         0.009936\n",
+      "dtype: float64\n",
+      "Random Forest avg_accuracy: 0.6145\n"
+     ]
+    }
+   ],
+   "source": [
+    "#---------------------average importances and accuracies----------------------#\n",
+    "#rf_accuracies_list\n",
+    "#rf_iteration_importances_df\n",
+    "\n",
+    "# calculate the average importance of features\n",
+    "rf_feature_mean_importances = rf_iteration_importances_df.mean(axis=0)\n",
+    "rf_feature_mean_importances\n",
+    "\n",
+    "rf_feature_mean_importances_sorted = rf_feature_mean_importances.sort_values(ascending=False)\n",
+    "print('RF model features and avg_importances:')\n",
+    "print(rf_feature_mean_importances_sorted)\n",
+    "\n",
+    "# calculate the average accuracies of rf model\n",
+    "\n",
+    "rf_avg_accuracy = np.mean(rf_accuracies_list)\n",
+    "print(\"Random Forest avg_accuracy:\", rf_avg_accuracy )\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Gradient Boosting Tree (Variable Selection 2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.ensemble import GradientBoostingClassifier\n",
+    "from sklearn.feature_selection import SelectFromModel\n",
+    "\n",
+    "#------------------------Make records for each iterations-----------------------\n",
+    "\n",
+    "gbt_accuracies_list = []\n",
+    "gbt_iteration_importances_df = pd.DataFrame(columns=X.columns)\n",
+    "\n",
+    "for i in range(num_repeats):\n",
+    "    #--------------------------Splitting the data into training and testing sets (80:20 ratio)-----------------------------------\n",
+    "    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)\n",
+    "\n",
+    "    # create Gradient Boosting Tree model\n",
+    "    gbt_model = GradientBoostingClassifier(n_estimators=100, random_state=i)  # using 100 trees, random seed 42\n",
+    "    gbt_model.fit(X_train, y_train)\n",
+    "\n",
+    "    # all variables and their importances\n",
+    "    gbt_importances = gbt_model.feature_importances_\n",
+    "\n",
+    "    # record the importance of variables\n",
+    "    gbt_iteration_importances_df.loc[i] = gbt_importances\n",
+    "\n",
+    "    #---------------------------------Accuracy for GBT modeling------------(with using same training data)\n",
+    "\n",
+    "    # Checking accuracy on testing data for GBT with all features\n",
+    "    gbt_y_pred = gbt_model.predict(X_test)\n",
+    "    gbt_accuracy = accuracy_score(y_test, gbt_y_pred)\n",
+    "    #print(\"Gradient Boosting tree Accuracy (using all features) on testing data:\", gbt_accuracy)\n",
+    "    gbt_accuracies_list.append(gbt_accuracy)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "GBT model features and avg_importances:\n",
+      "all_restaurants_count                             0.291120\n",
+      "supermarkets_count                                0.221202\n",
+      "Gesamtbetrag Einkünfte Median - [€]               0.122629\n",
+      "Anteil der Sozial-wohnungen an allen Wohnungen    0.111979\n",
+      "distance_rathaus                                  0.104781\n",
+      "Einwohner: innen je km²                           0.070611\n",
+      "greencrocers_count                                0.043105\n",
+      "biosupermarkets_count                             0.019531\n",
+      "vegan_restaurants_count                           0.007623\n",
+      "organic_restaurants_count                         0.007419\n",
+      "dtype: float64\n",
+      "GBT avg_accuracy: 0.6195\n"
+     ]
+    }
+   ],
+   "source": [
+    "#---------------------average importances and accuracies----------------------#\n",
+    "#gbt_accuracies_list\n",
+    "#gbt_iteration_importances_df\n",
+    "\n",
+    "# calculate the average importance of features\n",
+    "gbt_feature_mean_importances = gbt_iteration_importances_df.mean(axis=0)\n",
+    "gbt_feature_mean_importances\n",
+    "\n",
+    "#sort the features from highest importance to low\n",
+    "gbt_feature_mean_importances_sorted = gbt_feature_mean_importances.sort_values(ascending=False)\n",
+    "print('GBT model features and avg_importances:')\n",
+    "print(gbt_feature_mean_importances_sorted)\n",
+    "\n",
+    "# calculate the average accuracies of gbt model\n",
+    "\n",
+    "gbt_avg_accuracy = np.mean(gbt_accuracies_list)\n",
+    "print(\"GBT avg_accuracy:\", gbt_avg_accuracy )\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Recursive Feature Elimination with Random Forest (Variable Selection 3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.feature_selection import RFE\n",
+    "\n",
+    "#------------------------Make records for each iterations-----------------------\n",
+    "\n",
+    "rfe1_accuracies_list = []\n",
+    "rfe1_iteration_importances_df = pd.DataFrame(columns=X.columns)\n",
+    "\n",
+    "for i in range(num_repeats):\n",
+    "    #--------------------------Splitting the data into training and testing sets (80:20 ratio)-----------------------------------\n",
+    "    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)\n",
+    "\n",
+    "    #--------------------------RF modeling with RFE----------------------\n",
+    "\n",
+    "    # create Random Forest model2\n",
+    "    rf2_model = RandomForestClassifier(n_estimators=100, random_state=i)  # using 100 trees, random seed = 42\n",
+    "\n",
+    "    # variable selecting with Recursive Feature Elimination (RFE) & train model\n",
+    "    rfe1 = RFE(estimator=rf2_model, n_features_to_select=10, step=1)\n",
+    "    rfe1.fit(X_train, y_train)\n",
+    "\n",
+    "    # indices of RFE_RF_selected variables\n",
+    "    rfe1_selected_indices = rfe1.support_\n",
+    "\n",
+    "    # retrain the RandomForest model with RFE_RF_selected variables\n",
+    "\n",
+    "    rfe1_X_selected_train = X_train.iloc[:, rfe1_selected_indices]  # only RFE_RF_selected variables\n",
+    "    rfe1_X_selected_test = X_test.iloc[:, rfe1_selected_indices]\n",
+    "    rf2_model.fit(rfe1_X_selected_train, y_train)  # retrain the model\n",
+    "\n",
+    "    # print RFE_RF_selected variables and their importances after retraining\n",
+    "    rfe1_selected_features = X_train.columns[rfe1_selected_indices]\n",
+    "    rfe1_selected_importances = rf2_model.feature_importances_\n",
+    "\n",
+    "    # record the importance of variables\n",
+    "    rfe1_iteration_importances_df.loc[i] = rfe1_selected_importances\n",
+    "\n",
+    "    # check the accuracy of RFE_RF_model\n",
+    "\n",
+    "    rfe1_y_pred = rf2_model.predict(rfe1_X_selected_test)\n",
+    "    rfe1_accuracy = accuracy_score(y_test, rfe1_y_pred)\n",
+    "    #print(\"\\nRFE_RF_Accuracy:\", rfe1_accuracy)\n",
+    "    rfe1_accuracies_list.append(rfe1_accuracy)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "RFE_RF model features and avg_importances:\n",
+      "all_restaurants_count                             0.219926\n",
+      "supermarkets_count                                0.186518\n",
+      "Anteil der Sozial-wohnungen an allen Wohnungen    0.119212\n",
+      "Einwohner: innen je km²                           0.118131\n",
+      "Gesamtbetrag Einkünfte Median - [€]               0.114430\n",
+      "distance_rathaus                                  0.108481\n",
+      "greencrocers_count                                0.065882\n",
+      "biosupermarkets_count                             0.033396\n",
+      "vegan_restaurants_count                           0.024088\n",
+      "organic_restaurants_count                         0.009936\n",
+      "dtype: float64\n",
+      "RFE_RF avg_accuracy: 0.6145\n"
+     ]
+    }
+   ],
+   "source": [
+    "#---------------------average importances and accuracies----------------------#\n",
+    "#rfe1_accuracies_list\n",
+    "#rfe1_iteration_importances_df\n",
+    "\n",
+    "# calculate the average importance of features\n",
+    "rfe1_feature_mean_importances = rfe1_iteration_importances_df.mean(axis=0)\n",
+    "rfe1_feature_mean_importances\n",
+    "\n",
+    "#sort the features from highest importance to low\n",
+    "rfe1_feature_mean_importances_sorted = rfe1_feature_mean_importances.sort_values(ascending=False)\n",
+    "print('RFE_RF model features and avg_importances:')\n",
+    "print(rfe1_feature_mean_importances_sorted)\n",
+    "\n",
+    "# calculate the average accuracies of rfe1 model\n",
+    "\n",
+    "rfe1_avg_accuracy = np.mean(rfe1_accuracies_list)\n",
+    "print(\"RFE_RF avg_accuracy:\", rfe1_avg_accuracy )\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Recursive Feature Elimination with Gradient Boosting Tree (Variable Selection 4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#------------------------Make records for each iterations-----------------------\n",
+    "\n",
+    "rfe2_accuracies_list = []\n",
+    "rfe2_iteration_importances_df = pd.DataFrame(columns=X.columns)\n",
+    "\n",
+    "for i in range(num_repeats):\n",
+    "    #--------------------------Splitting the data into training and testing sets (80:20 ratio)-----------------------------------\n",
+    "    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)\n",
+    "\n",
+    "    #--------------------------GBT modeling with RFE----------------------\n",
+    "\n",
+    "    # Create Gradient Boosting Tree model\n",
+    "    gbt2_model = GradientBoostingClassifier(n_estimators=100, random_state=i)  # using 100 trees, random seed = 42\n",
+    "\n",
+    "    # variable selection using Recursive Feature Elimination (RFE) and train the RFE_GBT_model\n",
+    "    rfe2 = RFE(estimator=gbt2_model, n_features_to_select=10, step=1)\n",
+    "    rfe2.fit(X_train, y_train)\n",
+    "\n",
+    "    # retrain Gradient Boosting Tree with RFE_GBT_selected variables\n",
+    "    rfe2_X_selected_train = X_train.iloc[:, rfe2.support_]  # only RFE_GBT_selected variables\n",
+    "    rfe2_X_selected_test = X_test.iloc[:, rfe2.support_]\n",
+    "    gbt2_model.fit(rfe2_X_selected_train, y_train)  # retrain the model\n",
+    "\n",
+    "    # print RFE_GBT_selected variables and their importances\n",
+    "    rfe2_selected_features = X_train.columns[rfe2.support_]\n",
+    "    rfe2_selected_importances = gbt2_model.feature_importances_\n",
+    "\n",
+    "    # record the importance of variables\n",
+    "    rfe2_iteration_importances_df.loc[i] = rfe2_selected_importances\n",
+    "\n",
+    "    # check the accuracy of RFE_RF_model\n",
+    "\n",
+    "    rfe2_y_pred = gbt2_model.predict(rfe2_X_selected_test)\n",
+    "    rfe2_accuracy = accuracy_score(y_test, rfe2_y_pred)\n",
+    "    #print(\"\\nRFE_RF_Accuracy:\", rfe1_accuracy)\n",
+    "    rfe2_accuracies_list.append(rfe2_accuracy)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "RFE_GBT model features and avg_importances:\n",
+      "all_restaurants_count                             0.291120\n",
+      "supermarkets_count                                0.221202\n",
+      "Gesamtbetrag Einkünfte Median - [€]               0.122629\n",
+      "Anteil der Sozial-wohnungen an allen Wohnungen    0.111979\n",
+      "distance_rathaus                                  0.104781\n",
+      "Einwohner: innen je km²                           0.070611\n",
+      "greencrocers_count                                0.043105\n",
+      "biosupermarkets_count                             0.019531\n",
+      "vegan_restaurants_count                           0.007623\n",
+      "organic_restaurants_count                         0.007419\n",
+      "dtype: float64\n",
+      "RFE_GBT avg_accuracy: 0.6195\n"
+     ]
+    }
+   ],
+   "source": [
+    "#---------------------average importances and accuracies----------------------#\n",
+    "#rfe2_accuracies_list\n",
+    "#rfe2_iteration_importances_df\n",
+    "\n",
+    "# calculate the average importance of features\n",
+    "rfe2_feature_mean_importances = rfe2_iteration_importances_df.mean(axis=0)\n",
+    "rfe2_feature_mean_importances\n",
+    "\n",
+    "#sort the features from highest importance to low\n",
+    "rfe2_feature_mean_importances_sorted = rfe2_feature_mean_importances.sort_values(ascending=False)\n",
+    "print('RFE_GBT model features and avg_importances:')\n",
+    "print(rfe2_feature_mean_importances_sorted)\n",
+    "\n",
+    "# calculate the average accuracies of rfe1 model\n",
+    "\n",
+    "rfe2_avg_accuracy = np.mean(rfe2_accuracies_list)\n",
+    "print(\"RFE_GBT avg_accuracy:\", rfe2_avg_accuracy )\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Comparing selected variables from all 4 methods"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Feature</th>\n",
+       "      <th>Mean_Importance</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>all_restaurants_count</td>\n",
+       "      <td>0.291120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>supermarkets_count</td>\n",
+       "      <td>0.221202</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Gesamtbetrag Einkünfte Median - [€]</td>\n",
+       "      <td>0.122629</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Anteil der Sozial-wohnungen an allen Wohnungen</td>\n",
+       "      <td>0.111979</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>distance_rathaus</td>\n",
+       "      <td>0.104781</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Einwohner: innen je km²</td>\n",
+       "      <td>0.070611</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>greencrocers_count</td>\n",
+       "      <td>0.043105</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>biosupermarkets_count</td>\n",
+       "      <td>0.019531</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>vegan_restaurants_count</td>\n",
+       "      <td>0.007623</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>organic_restaurants_count</td>\n",
+       "      <td>0.007419</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                          Feature  Mean_Importance\n",
+       "0                           all_restaurants_count         0.291120\n",
+       "1                              supermarkets_count         0.221202\n",
+       "2             Gesamtbetrag Einkünfte Median - [€]         0.122629\n",
+       "3  Anteil der Sozial-wohnungen an allen Wohnungen         0.111979\n",
+       "4                                distance_rathaus         0.104781\n",
+       "5                         Einwohner: innen je km²         0.070611\n",
+       "6                              greencrocers_count         0.043105\n",
+       "7                           biosupermarkets_count         0.019531\n",
+       "8                         vegan_restaurants_count         0.007623\n",
+       "9                       organic_restaurants_count         0.007419"
+      ]
+     },
+     "execution_count": 65,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#----------------Make all importance results into dataframes----------------------------\n",
+    "\n",
+    "rf_feature_mean_importances_df = pd.DataFrame(rf_feature_mean_importances_sorted, columns=['Mean_Importance'])\n",
+    "rf_feature_mean_importances_df.reset_index(inplace=True)\n",
+    "rf_feature_mean_importances_df.rename(columns = {'index' : 'Feature'}, inplace = True)\n",
+    "rf_feature_mean_importances_df\n",
+    "\n",
+    "gbt_feature_mean_importances_df = pd.DataFrame(gbt_feature_mean_importances_sorted, columns=['Mean_Importance'])\n",
+    "gbt_feature_mean_importances_df.reset_index(inplace=True)\n",
+    "gbt_feature_mean_importances_df.rename(columns = {'index' : 'Feature'}, inplace = True)\n",
+    "gbt_feature_mean_importances_df\n",
+    "\n",
+    "rfe1_feature_mean_importances_df = pd.DataFrame(rfe1_feature_mean_importances_sorted, columns=['Mean_Importance'])\n",
+    "rfe1_feature_mean_importances_df.reset_index(inplace=True)\n",
+    "rfe1_feature_mean_importances_df.rename(columns = {'index' : 'Feature'}, inplace = True)\n",
+    "rfe1_feature_mean_importances_df\n",
+    "\n",
+    "rfe2_feature_mean_importances_df = pd.DataFrame(rfe2_feature_mean_importances_sorted, columns=['Mean_Importance'])\n",
+    "rfe2_feature_mean_importances_df.reset_index(inplace=True)\n",
+    "rfe2_feature_mean_importances_df.rename(columns = {'index' : 'Feature'}, inplace = True)\n",
+    "rfe2_feature_mean_importances_df\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>RF_Feature</th>\n",
+       "      <th>RF_avg_Importance</th>\n",
+       "      <th>GBT_Feature</th>\n",
+       "      <th>GBT_avg_Importance</th>\n",
+       "      <th>RFE_RF_Feature</th>\n",
+       "      <th>RFE_RF_avg_Importance</th>\n",
+       "      <th>RFE_GBT_Feature</th>\n",
+       "      <th>RFE_GBT_avg_Importance</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>all_restaurants_count</td>\n",
+       "      <td>0.219926</td>\n",
+       "      <td>all_restaurants_count</td>\n",
+       "      <td>0.291120</td>\n",
+       "      <td>all_restaurants_count</td>\n",
+       "      <td>0.219926</td>\n",
+       "      <td>all_restaurants_count</td>\n",
+       "      <td>0.291120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>supermarkets_count</td>\n",
+       "      <td>0.186518</td>\n",
+       "      <td>supermarkets_count</td>\n",
+       "      <td>0.221202</td>\n",
+       "      <td>supermarkets_count</td>\n",
+       "      <td>0.186518</td>\n",
+       "      <td>supermarkets_count</td>\n",
+       "      <td>0.221202</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Anteil der Sozial-wohnungen an allen Wohnungen</td>\n",
+       "      <td>0.119212</td>\n",
+       "      <td>Gesamtbetrag Einkünfte Median - [€]</td>\n",
+       "      <td>0.122629</td>\n",
+       "      <td>Anteil der Sozial-wohnungen an allen Wohnungen</td>\n",
+       "      <td>0.119212</td>\n",
+       "      <td>Gesamtbetrag Einkünfte Median - [€]</td>\n",
+       "      <td>0.122629</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Einwohner: innen je km²</td>\n",
+       "      <td>0.118131</td>\n",
+       "      <td>Anteil der Sozial-wohnungen an allen Wohnungen</td>\n",
+       "      <td>0.111979</td>\n",
+       "      <td>Einwohner: innen je km²</td>\n",
+       "      <td>0.118131</td>\n",
+       "      <td>Anteil der Sozial-wohnungen an allen Wohnungen</td>\n",
+       "      <td>0.111979</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Gesamtbetrag Einkünfte Median - [€]</td>\n",
+       "      <td>0.114430</td>\n",
+       "      <td>distance_rathaus</td>\n",
+       "      <td>0.104781</td>\n",
+       "      <td>Gesamtbetrag Einkünfte Median - [€]</td>\n",
+       "      <td>0.114430</td>\n",
+       "      <td>distance_rathaus</td>\n",
+       "      <td>0.104781</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>distance_rathaus</td>\n",
+       "      <td>0.108481</td>\n",
+       "      <td>Einwohner: innen je km²</td>\n",
+       "      <td>0.070611</td>\n",
+       "      <td>distance_rathaus</td>\n",
+       "      <td>0.108481</td>\n",
+       "      <td>Einwohner: innen je km²</td>\n",
+       "      <td>0.070611</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>greencrocers_count</td>\n",
+       "      <td>0.065882</td>\n",
+       "      <td>greencrocers_count</td>\n",
+       "      <td>0.043105</td>\n",
+       "      <td>greencrocers_count</td>\n",
+       "      <td>0.065882</td>\n",
+       "      <td>greencrocers_count</td>\n",
+       "      <td>0.043105</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>biosupermarkets_count</td>\n",
+       "      <td>0.033396</td>\n",
+       "      <td>biosupermarkets_count</td>\n",
+       "      <td>0.019531</td>\n",
+       "      <td>biosupermarkets_count</td>\n",
+       "      <td>0.033396</td>\n",
+       "      <td>biosupermarkets_count</td>\n",
+       "      <td>0.019531</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>vegan_restaurants_count</td>\n",
+       "      <td>0.024088</td>\n",
+       "      <td>vegan_restaurants_count</td>\n",
+       "      <td>0.007623</td>\n",
+       "      <td>vegan_restaurants_count</td>\n",
+       "      <td>0.024088</td>\n",
+       "      <td>vegan_restaurants_count</td>\n",
+       "      <td>0.007623</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>organic_restaurants_count</td>\n",
+       "      <td>0.009936</td>\n",
+       "      <td>organic_restaurants_count</td>\n",
+       "      <td>0.007419</td>\n",
+       "      <td>organic_restaurants_count</td>\n",
+       "      <td>0.009936</td>\n",
+       "      <td>organic_restaurants_count</td>\n",
+       "      <td>0.007419</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                       RF_Feature  RF_avg_Importance  \\\n",
+       "0                           all_restaurants_count           0.219926   \n",
+       "1                              supermarkets_count           0.186518   \n",
+       "2  Anteil der Sozial-wohnungen an allen Wohnungen           0.119212   \n",
+       "3                         Einwohner: innen je km²           0.118131   \n",
+       "4             Gesamtbetrag Einkünfte Median - [€]           0.114430   \n",
+       "5                                distance_rathaus           0.108481   \n",
+       "6                              greencrocers_count           0.065882   \n",
+       "7                           biosupermarkets_count           0.033396   \n",
+       "8                         vegan_restaurants_count           0.024088   \n",
+       "9                       organic_restaurants_count           0.009936   \n",
+       "\n",
+       "                                      GBT_Feature  GBT_avg_Importance  \\\n",
+       "0                           all_restaurants_count            0.291120   \n",
+       "1                              supermarkets_count            0.221202   \n",
+       "2             Gesamtbetrag Einkünfte Median - [€]            0.122629   \n",
+       "3  Anteil der Sozial-wohnungen an allen Wohnungen            0.111979   \n",
+       "4                                distance_rathaus            0.104781   \n",
+       "5                         Einwohner: innen je km²            0.070611   \n",
+       "6                              greencrocers_count            0.043105   \n",
+       "7                           biosupermarkets_count            0.019531   \n",
+       "8                         vegan_restaurants_count            0.007623   \n",
+       "9                       organic_restaurants_count            0.007419   \n",
+       "\n",
+       "                                   RFE_RF_Feature  RFE_RF_avg_Importance  \\\n",
+       "0                           all_restaurants_count               0.219926   \n",
+       "1                              supermarkets_count               0.186518   \n",
+       "2  Anteil der Sozial-wohnungen an allen Wohnungen               0.119212   \n",
+       "3                         Einwohner: innen je km²               0.118131   \n",
+       "4             Gesamtbetrag Einkünfte Median - [€]               0.114430   \n",
+       "5                                distance_rathaus               0.108481   \n",
+       "6                              greencrocers_count               0.065882   \n",
+       "7                           biosupermarkets_count               0.033396   \n",
+       "8                         vegan_restaurants_count               0.024088   \n",
+       "9                       organic_restaurants_count               0.009936   \n",
+       "\n",
+       "                                  RFE_GBT_Feature  RFE_GBT_avg_Importance  \n",
+       "0                           all_restaurants_count                0.291120  \n",
+       "1                              supermarkets_count                0.221202  \n",
+       "2             Gesamtbetrag Einkünfte Median - [€]                0.122629  \n",
+       "3  Anteil der Sozial-wohnungen an allen Wohnungen                0.111979  \n",
+       "4                                distance_rathaus                0.104781  \n",
+       "5                         Einwohner: innen je km²                0.070611  \n",
+       "6                              greencrocers_count                0.043105  \n",
+       "7                           biosupermarkets_count                0.019531  \n",
+       "8                         vegan_restaurants_count                0.007623  \n",
+       "9                       organic_restaurants_count                0.007419  "
+      ]
+     },
+     "execution_count": 66,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "#Make all the avg_importance results in one dataframe\n",
+    "\n",
+    "df1 = rf_feature_mean_importances_df\n",
+    "df1.rename(columns = {'Feature' : 'RF_Feature', 'Mean_Importance' : 'RF_avg_Importance'}, inplace = True)\n",
+    "\n",
+    "df2 = gbt_feature_mean_importances_df\n",
+    "df2.rename(columns = {'Feature' : 'GBT_Feature', 'Mean_Importance' : 'GBT_avg_Importance'}, inplace = True)\n",
+    "\n",
+    "df3 = rfe1_feature_mean_importances_df\n",
+    "df3.rename(columns = {'Feature' : 'RFE_RF_Feature', 'Mean_Importance' : 'RFE_RF_avg_Importance'}, inplace = True)\n",
+    "\n",
+    "df4 = rfe2_feature_mean_importances_df\n",
+    "df4.rename(columns = {'Feature' : 'RFE_GBT_Feature', 'Mean_Importance' : 'RFE_GBT_avg_Importance'}, inplace = True)\n",
+    "\n",
+    "# save all 4 dataframes in a list\n",
+    "dfs = [df1, df2, df3, df4]\n",
+    "\n",
+    "# merge all 4 dataframes\n",
+    "var_selections_df = pd.concat(dfs, axis=1, ignore_index=False)\n",
+    "\n",
+    "# put index (1,10)\n",
+    "#var_selections_df.index = range(1, 11)\n",
+    "var_selections_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>RF_Feature</th>\n",
+       "      <th>RF_avg_Importance</th>\n",
+       "      <th>GBT_Feature</th>\n",
+       "      <th>GBT_avg_Importance</th>\n",
+       "      <th>RFE_RF_Feature</th>\n",
+       "      <th>RFE_RF_avg_Importance</th>\n",
+       "      <th>RFE_GBT_Feature</th>\n",
+       "      <th>RFE_GBT_avg_Importance</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>RF_avg_Accuracy</td>\n",
+       "      <td>0.614500</td>\n",
+       "      <td>GBT_avg_Accuracy</td>\n",
+       "      <td>0.619500</td>\n",
+       "      <td>RFE_RF_avg_Accuracy</td>\n",
+       "      <td>0.614500</td>\n",
+       "      <td>RFE_GBT_avg_Accuracy</td>\n",
+       "      <td>0.619500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>all_restaurants_count</td>\n",
+       "      <td>0.219926</td>\n",
+       "      <td>all_restaurants_count</td>\n",
+       "      <td>0.291120</td>\n",
+       "      <td>all_restaurants_count</td>\n",
+       "      <td>0.219926</td>\n",
+       "      <td>all_restaurants_count</td>\n",
+       "      <td>0.291120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>supermarkets_count</td>\n",
+       "      <td>0.186518</td>\n",
+       "      <td>supermarkets_count</td>\n",
+       "      <td>0.221202</td>\n",
+       "      <td>supermarkets_count</td>\n",
+       "      <td>0.186518</td>\n",
+       "      <td>supermarkets_count</td>\n",
+       "      <td>0.221202</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Anteil der Sozial-wohnungen an allen Wohnungen</td>\n",
+       "      <td>0.119212</td>\n",
+       "      <td>Gesamtbetrag Einkünfte Median - [€]</td>\n",
+       "      <td>0.122629</td>\n",
+       "      <td>Anteil der Sozial-wohnungen an allen Wohnungen</td>\n",
+       "      <td>0.119212</td>\n",
+       "      <td>Gesamtbetrag Einkünfte Median - [€]</td>\n",
+       "      <td>0.122629</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Einwohner: innen je km²</td>\n",
+       "      <td>0.118131</td>\n",
+       "      <td>Anteil der Sozial-wohnungen an allen Wohnungen</td>\n",
+       "      <td>0.111979</td>\n",
+       "      <td>Einwohner: innen je km²</td>\n",
+       "      <td>0.118131</td>\n",
+       "      <td>Anteil der Sozial-wohnungen an allen Wohnungen</td>\n",
+       "      <td>0.111979</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Gesamtbetrag Einkünfte Median - [€]</td>\n",
+       "      <td>0.114430</td>\n",
+       "      <td>distance_rathaus</td>\n",
+       "      <td>0.104781</td>\n",
+       "      <td>Gesamtbetrag Einkünfte Median - [€]</td>\n",
+       "      <td>0.114430</td>\n",
+       "      <td>distance_rathaus</td>\n",
+       "      <td>0.104781</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>distance_rathaus</td>\n",
+       "      <td>0.108481</td>\n",
+       "      <td>Einwohner: innen je km²</td>\n",
+       "      <td>0.070611</td>\n",
+       "      <td>distance_rathaus</td>\n",
+       "      <td>0.108481</td>\n",
+       "      <td>Einwohner: innen je km²</td>\n",
+       "      <td>0.070611</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>greencrocers_count</td>\n",
+       "      <td>0.065882</td>\n",
+       "      <td>greencrocers_count</td>\n",
+       "      <td>0.043105</td>\n",
+       "      <td>greencrocers_count</td>\n",
+       "      <td>0.065882</td>\n",
+       "      <td>greencrocers_count</td>\n",
+       "      <td>0.043105</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>biosupermarkets_count</td>\n",
+       "      <td>0.033396</td>\n",
+       "      <td>biosupermarkets_count</td>\n",
+       "      <td>0.019531</td>\n",
+       "      <td>biosupermarkets_count</td>\n",
+       "      <td>0.033396</td>\n",
+       "      <td>biosupermarkets_count</td>\n",
+       "      <td>0.019531</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>vegan_restaurants_count</td>\n",
+       "      <td>0.024088</td>\n",
+       "      <td>vegan_restaurants_count</td>\n",
+       "      <td>0.007623</td>\n",
+       "      <td>vegan_restaurants_count</td>\n",
+       "      <td>0.024088</td>\n",
+       "      <td>vegan_restaurants_count</td>\n",
+       "      <td>0.007623</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>organic_restaurants_count</td>\n",
+       "      <td>0.009936</td>\n",
+       "      <td>organic_restaurants_count</td>\n",
+       "      <td>0.007419</td>\n",
+       "      <td>organic_restaurants_count</td>\n",
+       "      <td>0.009936</td>\n",
+       "      <td>organic_restaurants_count</td>\n",
+       "      <td>0.007419</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                       RF_Feature  RF_avg_Importance  \\\n",
+       "0                                 RF_avg_Accuracy           0.614500   \n",
+       "0                           all_restaurants_count           0.219926   \n",
+       "1                              supermarkets_count           0.186518   \n",
+       "2  Anteil der Sozial-wohnungen an allen Wohnungen           0.119212   \n",
+       "3                         Einwohner: innen je km²           0.118131   \n",
+       "4             Gesamtbetrag Einkünfte Median - [€]           0.114430   \n",
+       "5                                distance_rathaus           0.108481   \n",
+       "6                              greencrocers_count           0.065882   \n",
+       "7                           biosupermarkets_count           0.033396   \n",
+       "8                         vegan_restaurants_count           0.024088   \n",
+       "9                       organic_restaurants_count           0.009936   \n",
+       "\n",
+       "                                      GBT_Feature  GBT_avg_Importance  \\\n",
+       "0                                GBT_avg_Accuracy            0.619500   \n",
+       "0                           all_restaurants_count            0.291120   \n",
+       "1                              supermarkets_count            0.221202   \n",
+       "2             Gesamtbetrag Einkünfte Median - [€]            0.122629   \n",
+       "3  Anteil der Sozial-wohnungen an allen Wohnungen            0.111979   \n",
+       "4                                distance_rathaus            0.104781   \n",
+       "5                         Einwohner: innen je km²            0.070611   \n",
+       "6                              greencrocers_count            0.043105   \n",
+       "7                           biosupermarkets_count            0.019531   \n",
+       "8                         vegan_restaurants_count            0.007623   \n",
+       "9                       organic_restaurants_count            0.007419   \n",
+       "\n",
+       "                                   RFE_RF_Feature  RFE_RF_avg_Importance  \\\n",
+       "0                             RFE_RF_avg_Accuracy               0.614500   \n",
+       "0                           all_restaurants_count               0.219926   \n",
+       "1                              supermarkets_count               0.186518   \n",
+       "2  Anteil der Sozial-wohnungen an allen Wohnungen               0.119212   \n",
+       "3                         Einwohner: innen je km²               0.118131   \n",
+       "4             Gesamtbetrag Einkünfte Median - [€]               0.114430   \n",
+       "5                                distance_rathaus               0.108481   \n",
+       "6                              greencrocers_count               0.065882   \n",
+       "7                           biosupermarkets_count               0.033396   \n",
+       "8                         vegan_restaurants_count               0.024088   \n",
+       "9                       organic_restaurants_count               0.009936   \n",
+       "\n",
+       "                                  RFE_GBT_Feature  RFE_GBT_avg_Importance  \n",
+       "0                            RFE_GBT_avg_Accuracy                0.619500  \n",
+       "0                           all_restaurants_count                0.291120  \n",
+       "1                              supermarkets_count                0.221202  \n",
+       "2             Gesamtbetrag Einkünfte Median - [€]                0.122629  \n",
+       "3  Anteil der Sozial-wohnungen an allen Wohnungen                0.111979  \n",
+       "4                                distance_rathaus                0.104781  \n",
+       "5                         Einwohner: innen je km²                0.070611  \n",
+       "6                              greencrocers_count                0.043105  \n",
+       "7                           biosupermarkets_count                0.019531  \n",
+       "8                         vegan_restaurants_count                0.007623  \n",
+       "9                       organic_restaurants_count                0.007419  "
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#----------------------------------------Add Accuracy of 4 models to the importance result----------------------------\n",
+    "#var_selections_df\n",
+    "# Create a new row with 4 model's accuracy to append to var_selections_df\n",
+    "accuracy_row = {'RF_Feature': 'RF_avg_Accuracy', 'RF_avg_Importance': rf_avg_accuracy, \n",
+    "           'GBT_Feature': 'GBT_avg_Accuracy', 'GBT_avg_Importance': gbt_avg_accuracy, \n",
+    "           'RFE_RF_Feature': 'RFE_RF_avg_Accuracy', 'RFE_RF_avg_Importance': rfe1_avg_accuracy, \n",
+    "           'RFE_GBT_Feature': 'RFE_GBT_avg_Accuracy', 'RFE_GBT_avg_Importance':rfe2_avg_accuracy}\n",
+    "\n",
+    "# Convert the Accuracy row to a DataFrame\n",
+    "accuracy_row_df = pd.DataFrame(accuracy_row, index=[0])\n",
+    "accuracy_row_df\n",
+    "# Concatenate the new row DataFrame with var_selections_df\n",
+    "var_accuracy_df = pd.concat([ accuracy_row_df, var_selections_df])\n",
+    "\n",
+    "var_accuracy_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#------------------------Save the result as a csv file------------------------\n",
+    "\n",
+    "\n",
+    "# save as CSV file\n",
+    "#var_accuracy_df.to_csv('Result of variable selections for weekendmarkets(of all 4 methods with model accuracy).csv')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
-- 
GitLab