diff --git a/Correlation/Corr_Pvalue_VIF_all_and_final_10_variables.ipynb b/Correlation/Corr_Pvalue_VIF_all_and_final_10_variables.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..4bad2b92815558cecb2e2c92d9e436ae0c60619b --- /dev/null +++ b/Correlation/Corr_Pvalue_VIF_all_and_final_10_variables.ipynb @@ -0,0 +1,2625 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd \n", + "import numpy as np \n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Display more columns\n", + "pd.set_option('display.max_columns', 68)\n", + "#pd.set_option('display.max_rows', 101)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Put the \"final_data.csv\" for analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Anzahl der Einwohner: innen</th>\n", + " <th>Anzahl der Kinder und Jugendlichen unter 18 Jahren</th>\n", + " <th>Anteil Kinder und Jugendlicher unter 18 Jahren an der Gesamt-bevölkerung</th>\n", + " <th>Anzahl älterer Einwohner: innen über 64 Jahren</th>\n", + " <th>Anteil älterer Einwohner: innen über 64 Jahren an der Gesamt-bevölkerung</th>\n", + " <th>Anzahl der Haushalte</th>\n", + " <th>Durch-schnittliche Anzahl der Personen je Haushalt</th>\n", + " <th>Fläche in km²</th>\n", + " <th>Einwohner: innen je km²</th>\n", + " <th>Sozial-versicherungs-pflichtig Beschäftigte am Wohnort</th>\n", + " <th>Anteil sozial-versicherungs-pflichtig Beschäftigter am Wohnort an den Erwerbs-fähigen (15 bis unter 65-Jährige)</th>\n", + " <th>Anzahl der Arbeitslosen</th>\n", + " <th>Anzahl der Wohngebäude</th>\n", + " <th>Anzahl der Wohnungen</th>\n", + " <th>Durch-schnittliche Wohnungs-größe in m²</th>\n", + " <th>Durch-schnittliche Wohnfläche je Einwohner:in in m²</th>\n", + " <th>Anzahl der Sozial-wohnungen</th>\n", + " <th>Anteil der Sozial-wohnungen an allen Wohnungen</th>\n", + " <th>Durch-schnittlicher Immobilien-preis für ein Grundstück in EUR/m²</th>\n", + " <th>Durch-schnittlicher Immobilien-preis für eine Eigentums-wohnung in EUR/m²</th>\n", + " <th>Anzahl der Wohnungen in Ein- und Zweifamilien-häusern</th>\n", + " <th>Anteil der Wohnungen in Ein- und Zweifamilien-häusern an allen Wohnungen</th>\n", + " <th>Anzahl der Einpersonen-haushalte</th>\n", + " <th>Anteil der Haushalte, in denen nur eine Person lebt, an allen Haushalten</th>\n", + " <th>Gesamtbetrag der Einkünfte - [Steuerpflichtig]</th>\n", + " <th>Gesamtbetrag der Einkünfte - [1000€]</th>\n", + " <th>Festgesetzte Einkommenssteuer/ Jahreslohnsteuer - [1000€]</th>\n", + " <th>Gesamtbetrag Einkünfte Mittelwert - [€]</th>\n", + " <th>Gesamtbetrag Einkünfte Median - [€]</th>\n", + " <th>market_count</th>\n", + " <th>farms_count</th>\n", + " <th>greencrocers_count</th>\n", + " <th>supermarkets_count</th>\n", + " <th>biosupermarkets_count</th>\n", + " <th>all_restaurants_count</th>\n", + " <th>organic_restaurants_count</th>\n", + " <th>vegan_restaurants_count</th>\n", + " <th>art_score</th>\n", + " <th>distance_rathaus</th>\n", + " </tr>\n", + " <tr>\n", + " <th>stadtteil</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Hamburg-Altstadt</th>\n", + " <td>3182.0</td>\n", + " <td>515.0</td>\n", + " <td>16.2</td>\n", + " <td>316.0</td>\n", + " <td>9.9</td>\n", + " <td>1884.0</td>\n", + " <td>1.7</td>\n", + " <td>1.3</td>\n", + " <td>2447.0</td>\n", + " <td>1346.0</td>\n", + " <td>55.6</td>\n", + " <td>324.0</td>\n", + " <td>103.0</td>\n", + " <td>1487.0</td>\n", + " <td>74.1</td>\n", + " <td>34.6</td>\n", + " <td>176.0</td>\n", + " <td>11.8</td>\n", + " <td>2366.0</td>\n", + " <td>4869.0</td>\n", + " <td>17.0</td>\n", + " <td>1.1</td>\n", + " <td>1057.0</td>\n", + " <td>56.1</td>\n", + " <td>1952.0</td>\n", + " <td>61168.0</td>\n", + " <td>11577.0</td>\n", + " <td>31336.0</td>\n", + " <td>10811.0</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>4</td>\n", + " <td>0</td>\n", + " <td>129</td>\n", + " <td>0</td>\n", + " <td>4</td>\n", + " <td>2.565476</td>\n", + " <td>0.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>HafenCity</th>\n", + " <td>6950.0</td>\n", + " <td>1386.0</td>\n", + " <td>19.9</td>\n", + " <td>644.0</td>\n", + " <td>9.3</td>\n", + " <td>3183.0</td>\n", + " <td>2.2</td>\n", + " <td>2.4</td>\n", + " <td>2865.0</td>\n", + " <td>3087.0</td>\n", + " <td>61.6</td>\n", + " <td>147.0</td>\n", + " <td>141.0</td>\n", + " <td>3898.0</td>\n", + " <td>81.4</td>\n", + " <td>45.7</td>\n", + " <td>1074.0</td>\n", + " <td>27.6</td>\n", + " <td>3031.0</td>\n", + " <td>10746.0</td>\n", + " <td>5.0</td>\n", + " <td>0.1</td>\n", + " <td>1126.0</td>\n", + " <td>35.4</td>\n", + " <td>1255.0</td>\n", + " <td>116973.0</td>\n", + " <td>34051.0</td>\n", + " <td>93206.0</td>\n", + " <td>57913.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>6</td>\n", + " <td>1</td>\n", + " <td>56</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>1.952381</td>\n", + " <td>0.005758</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Neustadt</th>\n", + " <td>12649.0</td>\n", + " <td>1412.0</td>\n", + " <td>11.2</td>\n", + " <td>1919.0</td>\n", + " <td>15.2</td>\n", + " <td>8683.0</td>\n", + " <td>1.5</td>\n", + " <td>2.3</td>\n", + " <td>5592.0</td>\n", + " <td>6350.0</td>\n", + " <td>66.9</td>\n", + " <td>493.0</td>\n", + " <td>652.0</td>\n", + " <td>7700.0</td>\n", + " <td>63.1</td>\n", + " <td>38.4</td>\n", + " <td>992.0</td>\n", + " <td>12.9</td>\n", + " <td>2304.0</td>\n", + " <td>8240.0</td>\n", + " <td>70.0</td>\n", + " <td>0.9</td>\n", + " <td>5994.0</td>\n", + " <td>69.0</td>\n", + " <td>7015.0</td>\n", + " <td>242164.0</td>\n", + " <td>46861.0</td>\n", + " <td>34521.0</td>\n", + " <td>24715.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>6</td>\n", + " <td>1</td>\n", + " <td>140</td>\n", + " <td>4</td>\n", + " <td>4</td>\n", + " <td>2.702381</td>\n", + " <td>0.001174</td>\n", + " </tr>\n", + " <tr>\n", + " <th>St. Pauli</th>\n", + " <td>22056.0</td>\n", + " <td>2941.0</td>\n", + " <td>13.3</td>\n", + " <td>2270.0</td>\n", + " <td>10.3</td>\n", + " <td>14772.0</td>\n", + " <td>1.5</td>\n", + " <td>2.2</td>\n", + " <td>9836.0</td>\n", + " <td>9903.0</td>\n", + " <td>57.2</td>\n", + " <td>1535.0</td>\n", + " <td>1293.0</td>\n", + " <td>12667.0</td>\n", + " <td>64.2</td>\n", + " <td>36.9</td>\n", + " <td>1522.0</td>\n", + " <td>12.0</td>\n", + " <td>1998.0</td>\n", + " <td>7716.0</td>\n", + " <td>173.0</td>\n", + " <td>1.4</td>\n", + " <td>10184.0</td>\n", + " <td>68.9</td>\n", + " <td>11066.0</td>\n", + " <td>309596.0</td>\n", + " <td>55589.0</td>\n", + " <td>27977.0</td>\n", + " <td>19399.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>10</td>\n", + " <td>0</td>\n", + " <td>109</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>1.851190</td>\n", + " <td>0.009631</td>\n", + " </tr>\n", + " <tr>\n", + " <th>St. Georg</th>\n", + " <td>12318.0</td>\n", + " <td>1420.0</td>\n", + " <td>11.5</td>\n", + " <td>1676.0</td>\n", + " <td>13.6</td>\n", + " <td>7720.0</td>\n", + " <td>1.6</td>\n", + " <td>1.8</td>\n", + " <td>6758.0</td>\n", + " <td>5424.0</td>\n", + " <td>57.6</td>\n", + " <td>659.0</td>\n", + " <td>561.0</td>\n", + " <td>6444.0</td>\n", + " <td>71.1</td>\n", + " <td>37.2</td>\n", + " <td>764.0</td>\n", + " <td>11.9</td>\n", + " <td>1996.0</td>\n", + " <td>7961.0</td>\n", + " <td>63.0</td>\n", + " <td>1.0</td>\n", + " <td>5043.0</td>\n", + " <td>65.3</td>\n", + " <td>5683.0</td>\n", + " <td>250742.0</td>\n", + " <td>58371.0</td>\n", + " <td>44121.0</td>\n", + " <td>27161.0</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>11</td>\n", + " <td>0</td>\n", + " <td>80</td>\n", + " <td>2</td>\n", + " <td>4</td>\n", + " <td>2.523810</td>\n", + " <td>0.008493</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Hausbruch</th>\n", + " <td>16868.0</td>\n", + " <td>3196.0</td>\n", + " <td>18.9</td>\n", + " <td>3554.0</td>\n", + " <td>21.1</td>\n", + " <td>7829.0</td>\n", + " <td>2.1</td>\n", + " <td>9.8</td>\n", + " <td>1715.0</td>\n", + " <td>6492.0</td>\n", + " <td>60.8</td>\n", + " <td>758.0</td>\n", + " <td>3053.0</td>\n", + " <td>7323.0</td>\n", + " <td>85.0</td>\n", + " <td>36.9</td>\n", + " <td>1030.0</td>\n", + " <td>14.1</td>\n", + " <td>541.0</td>\n", + " <td>3877.0</td>\n", + " <td>2793.0</td>\n", + " <td>38.1</td>\n", + " <td>3134.0</td>\n", + " <td>40.0</td>\n", + " <td>7349.0</td>\n", + " <td>227990.0</td>\n", + " <td>36179.0</td>\n", + " <td>31023.0</td>\n", + " <td>21355.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0.422619</td>\n", + " <td>0.104116</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Neugraben-Fischbek</th>\n", + " <td>33963.0</td>\n", + " <td>7480.0</td>\n", + " <td>22.0</td>\n", + " <td>6318.0</td>\n", + " <td>18.6</td>\n", + " <td>15602.0</td>\n", + " <td>2.2</td>\n", + " <td>22.5</td>\n", + " <td>1508.0</td>\n", + " <td>12746.0</td>\n", + " <td>59.9</td>\n", + " <td>1493.0</td>\n", + " <td>6669.0</td>\n", + " <td>14755.0</td>\n", + " <td>87.0</td>\n", + " <td>37.8</td>\n", + " <td>1078.0</td>\n", + " <td>7.3</td>\n", + " <td>554.0</td>\n", + " <td>3912.0</td>\n", + " <td>6124.0</td>\n", + " <td>41.5</td>\n", + " <td>6247.0</td>\n", + " <td>40.0</td>\n", + " <td>12290.0</td>\n", + " <td>382231.0</td>\n", + " <td>60244.0</td>\n", + " <td>31101.0</td>\n", + " <td>22492.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>8</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1.857143</td>\n", + " <td>0.130211</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Francop</th>\n", + " <td>736.0</td>\n", + " <td>119.0</td>\n", + " <td>16.2</td>\n", + " <td>133.0</td>\n", + " <td>18.1</td>\n", + " <td>374.0</td>\n", + " <td>2.0</td>\n", + " <td>8.8</td>\n", + " <td>84.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>208.0</td>\n", + " <td>347.0</td>\n", + " <td>98.5</td>\n", + " <td>46.4</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>427.0</td>\n", + " <td>NaN</td>\n", + " <td>255.0</td>\n", + " <td>73.5</td>\n", + " <td>161.0</td>\n", + " <td>43.0</td>\n", + " <td>356.0</td>\n", + " <td>12738.0</td>\n", + " <td>2083.0</td>\n", + " <td>35782.0</td>\n", + " <td>26568.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0.000000</td>\n", + " <td>0.114626</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Neuenfelde</th>\n", + " <td>5245.0</td>\n", + " <td>1251.0</td>\n", + " <td>23.9</td>\n", + " <td>748.0</td>\n", + " <td>14.3</td>\n", + " <td>2337.0</td>\n", + " <td>2.2</td>\n", + " <td>15.7</td>\n", + " <td>335.0</td>\n", + " <td>1957.0</td>\n", + " <td>57.1</td>\n", + " <td>253.0</td>\n", + " <td>1103.0</td>\n", + " <td>2037.0</td>\n", + " <td>95.4</td>\n", + " <td>37.1</td>\n", + " <td>309.0</td>\n", + " <td>15.2</td>\n", + " <td>401.0</td>\n", + " <td>NaN</td>\n", + " <td>1261.0</td>\n", + " <td>61.9</td>\n", + " <td>1033.0</td>\n", + " <td>44.2</td>\n", + " <td>1909.0</td>\n", + " <td>62765.0</td>\n", + " <td>10422.0</td>\n", + " <td>32879.0</td>\n", + " <td>22909.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0.375000</td>\n", + " <td>0.162791</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Cranz</th>\n", + " <td>810.0</td>\n", + " <td>130.0</td>\n", + " <td>16.0</td>\n", + " <td>154.0</td>\n", + " <td>19.0</td>\n", + " <td>460.0</td>\n", + " <td>1.8</td>\n", + " <td>1.3</td>\n", + " <td>608.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>170.0</td>\n", + " <td>405.0</td>\n", + " <td>87.5</td>\n", + " <td>43.7</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>451.0</td>\n", + " <td>NaN</td>\n", + " <td>146.0</td>\n", + " <td>36.0</td>\n", + " <td>252.0</td>\n", + " <td>54.8</td>\n", + " <td>378.0</td>\n", + " <td>11845.0</td>\n", + " <td>2018.0</td>\n", + " <td>31335.0</td>\n", + " <td>22852.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0.000000</td>\n", + " <td>0.200686</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>99 rows × 39 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Anzahl der Einwohner: innen \\\n", + "stadtteil \n", + "Hamburg-Altstadt 3182.0 \n", + "HafenCity 6950.0 \n", + "Neustadt 12649.0 \n", + "St. Pauli 22056.0 \n", + "St. Georg 12318.0 \n", + "... ... \n", + "Hausbruch 16868.0 \n", + "Neugraben-Fischbek 33963.0 \n", + "Francop 736.0 \n", + "Neuenfelde 5245.0 \n", + "Cranz 810.0 \n", + "\n", + " Anzahl der Kinder und Jugendlichen unter 18 Jahren \\\n", + "stadtteil \n", + "Hamburg-Altstadt 515.0 \n", + "HafenCity 1386.0 \n", + "Neustadt 1412.0 \n", + "St. Pauli 2941.0 \n", + "St. Georg 1420.0 \n", + "... ... \n", + "Hausbruch 3196.0 \n", + "Neugraben-Fischbek 7480.0 \n", + "Francop 119.0 \n", + "Neuenfelde 1251.0 \n", + "Cranz 130.0 \n", + "\n", + " Anteil Kinder und Jugendlicher unter 18 Jahren an der Gesamt-bevölkerung \\\n", + "stadtteil \n", + "Hamburg-Altstadt 16.2 \n", + "HafenCity 19.9 \n", + "Neustadt 11.2 \n", + "St. Pauli 13.3 \n", + "St. Georg 11.5 \n", + "... ... \n", + "Hausbruch 18.9 \n", + "Neugraben-Fischbek 22.0 \n", + "Francop 16.2 \n", + "Neuenfelde 23.9 \n", + "Cranz 16.0 \n", + "\n", + " Anzahl älterer Einwohner: innen über 64 Jahren \\\n", + "stadtteil \n", + "Hamburg-Altstadt 316.0 \n", + "HafenCity 644.0 \n", + "Neustadt 1919.0 \n", + "St. Pauli 2270.0 \n", + "St. Georg 1676.0 \n", + "... ... \n", + "Hausbruch 3554.0 \n", + "Neugraben-Fischbek 6318.0 \n", + "Francop 133.0 \n", + "Neuenfelde 748.0 \n", + "Cranz 154.0 \n", + "\n", + " Anteil älterer Einwohner: innen über 64 Jahren an der Gesamt-bevölkerung \\\n", + "stadtteil \n", + "Hamburg-Altstadt 9.9 \n", + "HafenCity 9.3 \n", + "Neustadt 15.2 \n", + "St. Pauli 10.3 \n", + "St. Georg 13.6 \n", + "... ... \n", + "Hausbruch 21.1 \n", + "Neugraben-Fischbek 18.6 \n", + "Francop 18.1 \n", + "Neuenfelde 14.3 \n", + "Cranz 19.0 \n", + "\n", + " Anzahl der Haushalte \\\n", + "stadtteil \n", + "Hamburg-Altstadt 1884.0 \n", + "HafenCity 3183.0 \n", + "Neustadt 8683.0 \n", + "St. Pauli 14772.0 \n", + "St. Georg 7720.0 \n", + "... ... \n", + "Hausbruch 7829.0 \n", + "Neugraben-Fischbek 15602.0 \n", + "Francop 374.0 \n", + "Neuenfelde 2337.0 \n", + "Cranz 460.0 \n", + "\n", + " Durch-schnittliche Anzahl der Personen je Haushalt \\\n", + "stadtteil \n", + "Hamburg-Altstadt 1.7 \n", + "HafenCity 2.2 \n", + "Neustadt 1.5 \n", + "St. Pauli 1.5 \n", + "St. Georg 1.6 \n", + "... ... \n", + "Hausbruch 2.1 \n", + "Neugraben-Fischbek 2.2 \n", + "Francop 2.0 \n", + "Neuenfelde 2.2 \n", + "Cranz 1.8 \n", + "\n", + " Fläche in km² Einwohner: innen je km² \\\n", + "stadtteil \n", + "Hamburg-Altstadt 1.3 2447.0 \n", + "HafenCity 2.4 2865.0 \n", + "Neustadt 2.3 5592.0 \n", + "St. Pauli 2.2 9836.0 \n", + "St. Georg 1.8 6758.0 \n", + "... ... ... \n", + "Hausbruch 9.8 1715.0 \n", + "Neugraben-Fischbek 22.5 1508.0 \n", + "Francop 8.8 84.0 \n", + "Neuenfelde 15.7 335.0 \n", + "Cranz 1.3 608.0 \n", + "\n", + " Sozial-versicherungs-pflichtig Beschäftigte am Wohnort \\\n", + "stadtteil \n", + "Hamburg-Altstadt 1346.0 \n", + "HafenCity 3087.0 \n", + "Neustadt 6350.0 \n", + "St. Pauli 9903.0 \n", + "St. Georg 5424.0 \n", + "... ... \n", + "Hausbruch 6492.0 \n", + "Neugraben-Fischbek 12746.0 \n", + "Francop NaN \n", + "Neuenfelde 1957.0 \n", + "Cranz NaN \n", + "\n", + " Anteil sozial-versicherungs-pflichtig Beschäftigter am Wohnort an den Erwerbs-fähigen (15 bis unter 65-Jährige) \\\n", + "stadtteil \n", + "Hamburg-Altstadt 55.6 \n", + "HafenCity 61.6 \n", + "Neustadt 66.9 \n", + "St. Pauli 57.2 \n", + "St. Georg 57.6 \n", + "... ... \n", + "Hausbruch 60.8 \n", + "Neugraben-Fischbek 59.9 \n", + "Francop NaN \n", + "Neuenfelde 57.1 \n", + "Cranz NaN \n", + "\n", + " Anzahl der Arbeitslosen Anzahl der Wohngebäude \\\n", + "stadtteil \n", + "Hamburg-Altstadt 324.0 103.0 \n", + "HafenCity 147.0 141.0 \n", + "Neustadt 493.0 652.0 \n", + "St. Pauli 1535.0 1293.0 \n", + "St. Georg 659.0 561.0 \n", + "... ... ... \n", + "Hausbruch 758.0 3053.0 \n", + "Neugraben-Fischbek 1493.0 6669.0 \n", + "Francop NaN 208.0 \n", + "Neuenfelde 253.0 1103.0 \n", + "Cranz NaN 170.0 \n", + "\n", + " Anzahl der Wohnungen \\\n", + "stadtteil \n", + "Hamburg-Altstadt 1487.0 \n", + "HafenCity 3898.0 \n", + "Neustadt 7700.0 \n", + "St. Pauli 12667.0 \n", + "St. Georg 6444.0 \n", + "... ... \n", + "Hausbruch 7323.0 \n", + "Neugraben-Fischbek 14755.0 \n", + "Francop 347.0 \n", + "Neuenfelde 2037.0 \n", + "Cranz 405.0 \n", + "\n", + " Durch-schnittliche Wohnungs-größe in m² \\\n", + "stadtteil \n", + "Hamburg-Altstadt 74.1 \n", + "HafenCity 81.4 \n", + "Neustadt 63.1 \n", + "St. Pauli 64.2 \n", + "St. Georg 71.1 \n", + "... ... \n", + "Hausbruch 85.0 \n", + "Neugraben-Fischbek 87.0 \n", + "Francop 98.5 \n", + "Neuenfelde 95.4 \n", + "Cranz 87.5 \n", + "\n", + " Durch-schnittliche Wohnfläche je Einwohner:in in m² \\\n", + "stadtteil \n", + "Hamburg-Altstadt 34.6 \n", + "HafenCity 45.7 \n", + "Neustadt 38.4 \n", + "St. Pauli 36.9 \n", + "St. Georg 37.2 \n", + "... ... \n", + "Hausbruch 36.9 \n", + "Neugraben-Fischbek 37.8 \n", + "Francop 46.4 \n", + "Neuenfelde 37.1 \n", + "Cranz 43.7 \n", + "\n", + " Anzahl der Sozial-wohnungen \\\n", + "stadtteil \n", + "Hamburg-Altstadt 176.0 \n", + "HafenCity 1074.0 \n", + "Neustadt 992.0 \n", + "St. Pauli 1522.0 \n", + "St. Georg 764.0 \n", + "... ... \n", + "Hausbruch 1030.0 \n", + "Neugraben-Fischbek 1078.0 \n", + "Francop 0.0 \n", + "Neuenfelde 309.0 \n", + "Cranz 0.0 \n", + "\n", + " Anteil der Sozial-wohnungen an allen Wohnungen \\\n", + "stadtteil \n", + "Hamburg-Altstadt 11.8 \n", + "HafenCity 27.6 \n", + "Neustadt 12.9 \n", + "St. Pauli 12.0 \n", + "St. Georg 11.9 \n", + "... ... \n", + "Hausbruch 14.1 \n", + "Neugraben-Fischbek 7.3 \n", + "Francop 0.0 \n", + "Neuenfelde 15.2 \n", + "Cranz 0.0 \n", + "\n", + " Durch-schnittlicher Immobilien-preis für ein Grundstück in EUR/m² \\\n", + "stadtteil \n", + "Hamburg-Altstadt 2366.0 \n", + "HafenCity 3031.0 \n", + "Neustadt 2304.0 \n", + "St. Pauli 1998.0 \n", + "St. Georg 1996.0 \n", + "... ... \n", + "Hausbruch 541.0 \n", + "Neugraben-Fischbek 554.0 \n", + "Francop 427.0 \n", + "Neuenfelde 401.0 \n", + "Cranz 451.0 \n", + "\n", + " Durch-schnittlicher Immobilien-preis für eine Eigentums-wohnung in EUR/m² \\\n", + "stadtteil \n", + "Hamburg-Altstadt 4869.0 \n", + "HafenCity 10746.0 \n", + "Neustadt 8240.0 \n", + "St. Pauli 7716.0 \n", + "St. Georg 7961.0 \n", + "... ... \n", + "Hausbruch 3877.0 \n", + "Neugraben-Fischbek 3912.0 \n", + "Francop NaN \n", + "Neuenfelde NaN \n", + "Cranz NaN \n", + "\n", + " Anzahl der Wohnungen in Ein- und Zweifamilien-häusern \\\n", + "stadtteil \n", + "Hamburg-Altstadt 17.0 \n", + "HafenCity 5.0 \n", + "Neustadt 70.0 \n", + "St. Pauli 173.0 \n", + "St. Georg 63.0 \n", + "... ... \n", + "Hausbruch 2793.0 \n", + "Neugraben-Fischbek 6124.0 \n", + "Francop 255.0 \n", + "Neuenfelde 1261.0 \n", + "Cranz 146.0 \n", + "\n", + " Anteil der Wohnungen in Ein- und Zweifamilien-häusern an allen Wohnungen \\\n", + "stadtteil \n", + "Hamburg-Altstadt 1.1 \n", + "HafenCity 0.1 \n", + "Neustadt 0.9 \n", + "St. Pauli 1.4 \n", + "St. Georg 1.0 \n", + "... ... \n", + "Hausbruch 38.1 \n", + "Neugraben-Fischbek 41.5 \n", + "Francop 73.5 \n", + "Neuenfelde 61.9 \n", + "Cranz 36.0 \n", + "\n", + " Anzahl der Einpersonen-haushalte \\\n", + "stadtteil \n", + "Hamburg-Altstadt 1057.0 \n", + "HafenCity 1126.0 \n", + "Neustadt 5994.0 \n", + "St. Pauli 10184.0 \n", + "St. Georg 5043.0 \n", + "... ... \n", + "Hausbruch 3134.0 \n", + "Neugraben-Fischbek 6247.0 \n", + "Francop 161.0 \n", + "Neuenfelde 1033.0 \n", + "Cranz 252.0 \n", + "\n", + " Anteil der Haushalte, in denen nur eine Person lebt, an allen Haushalten \\\n", + "stadtteil \n", + "Hamburg-Altstadt 56.1 \n", + "HafenCity 35.4 \n", + "Neustadt 69.0 \n", + "St. Pauli 68.9 \n", + "St. Georg 65.3 \n", + "... ... \n", + "Hausbruch 40.0 \n", + "Neugraben-Fischbek 40.0 \n", + "Francop 43.0 \n", + "Neuenfelde 44.2 \n", + "Cranz 54.8 \n", + "\n", + " Gesamtbetrag der Einkünfte - [Steuerpflichtig] \\\n", + "stadtteil \n", + "Hamburg-Altstadt 1952.0 \n", + "HafenCity 1255.0 \n", + "Neustadt 7015.0 \n", + "St. Pauli 11066.0 \n", + "St. Georg 5683.0 \n", + "... ... \n", + "Hausbruch 7349.0 \n", + "Neugraben-Fischbek 12290.0 \n", + "Francop 356.0 \n", + "Neuenfelde 1909.0 \n", + "Cranz 378.0 \n", + "\n", + " Gesamtbetrag der Einkünfte - [1000€] \\\n", + "stadtteil \n", + "Hamburg-Altstadt 61168.0 \n", + "HafenCity 116973.0 \n", + "Neustadt 242164.0 \n", + "St. Pauli 309596.0 \n", + "St. Georg 250742.0 \n", + "... ... \n", + "Hausbruch 227990.0 \n", + "Neugraben-Fischbek 382231.0 \n", + "Francop 12738.0 \n", + "Neuenfelde 62765.0 \n", + "Cranz 11845.0 \n", + "\n", + " Festgesetzte Einkommenssteuer/ Jahreslohnsteuer - [1000€] \\\n", + "stadtteil \n", + "Hamburg-Altstadt 11577.0 \n", + "HafenCity 34051.0 \n", + "Neustadt 46861.0 \n", + "St. Pauli 55589.0 \n", + "St. Georg 58371.0 \n", + "... ... \n", + "Hausbruch 36179.0 \n", + "Neugraben-Fischbek 60244.0 \n", + "Francop 2083.0 \n", + "Neuenfelde 10422.0 \n", + "Cranz 2018.0 \n", + "\n", + " Gesamtbetrag Einkünfte Mittelwert - [€] \\\n", + "stadtteil \n", + "Hamburg-Altstadt 31336.0 \n", + "HafenCity 93206.0 \n", + "Neustadt 34521.0 \n", + "St. Pauli 27977.0 \n", + "St. Georg 44121.0 \n", + "... ... \n", + "Hausbruch 31023.0 \n", + "Neugraben-Fischbek 31101.0 \n", + "Francop 35782.0 \n", + "Neuenfelde 32879.0 \n", + "Cranz 31335.0 \n", + "\n", + " Gesamtbetrag Einkünfte Median - [€] market_count \\\n", + "stadtteil \n", + "Hamburg-Altstadt 10811.0 2 \n", + "HafenCity 57913.0 1 \n", + "Neustadt 24715.0 1 \n", + "St. Pauli 19399.0 1 \n", + "St. Georg 27161.0 2 \n", + "... ... ... \n", + "Hausbruch 21355.0 0 \n", + "Neugraben-Fischbek 22492.0 1 \n", + "Francop 26568.0 0 \n", + "Neuenfelde 22909.0 0 \n", + "Cranz 22852.0 0 \n", + "\n", + " farms_count greencrocers_count supermarkets_count \\\n", + "stadtteil \n", + "Hamburg-Altstadt 0 1 4 \n", + "HafenCity 0 0 6 \n", + "Neustadt 0 2 6 \n", + "St. Pauli 0 1 10 \n", + "St. Georg 0 0 11 \n", + "... ... ... ... \n", + "Hausbruch 0 1 1 \n", + "Neugraben-Fischbek 0 0 4 \n", + "Francop 0 0 0 \n", + "Neuenfelde 0 1 0 \n", + "Cranz 0 0 0 \n", + "\n", + " biosupermarkets_count all_restaurants_count \\\n", + "stadtteil \n", + "Hamburg-Altstadt 0 129 \n", + "HafenCity 1 56 \n", + "Neustadt 1 140 \n", + "St. Pauli 0 109 \n", + "St. Georg 0 80 \n", + "... ... ... \n", + "Hausbruch 0 1 \n", + "Neugraben-Fischbek 1 8 \n", + "Francop 0 0 \n", + "Neuenfelde 0 2 \n", + "Cranz 0 2 \n", + "\n", + " organic_restaurants_count vegan_restaurants_count \\\n", + "stadtteil \n", + "Hamburg-Altstadt 0 4 \n", + "HafenCity 3 1 \n", + "Neustadt 4 4 \n", + "St. Pauli 1 2 \n", + "St. Georg 2 4 \n", + "... ... ... \n", + "Hausbruch 0 0 \n", + "Neugraben-Fischbek 0 0 \n", + "Francop 0 0 \n", + "Neuenfelde 0 0 \n", + "Cranz 0 0 \n", + "\n", + " art_score distance_rathaus \n", + "stadtteil \n", + "Hamburg-Altstadt 2.565476 0.000000 \n", + "HafenCity 1.952381 0.005758 \n", + "Neustadt 2.702381 0.001174 \n", + "St. Pauli 1.851190 0.009631 \n", + "St. Georg 2.523810 0.008493 \n", + "... ... ... \n", + "Hausbruch 0.422619 0.104116 \n", + "Neugraben-Fischbek 1.857143 0.130211 \n", + "Francop 0.000000 0.114626 \n", + "Neuenfelde 0.375000 0.162791 \n", + "Cranz 0.000000 0.200686 \n", + "\n", + "[99 rows x 39 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#path1 = \"C:/Users/HOME/Git/project-version-2-1/Data/final_data.csv\"\n", + "path1 = Data/final_data.csv\n", + "final_df = pd.read_csv(path1)\n", + "\n", + "final_df.columns.values[0] = \"stadtteil\"\n", + "final_df.set_index(final_df.columns[0], inplace=True)\n", + "final_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Corr_Pvalue_VIF for all 39 variables (after literature filtering)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total number of missing values in merged_df: 0\n" + ] + } + ], + "source": [ + "#replace NaNs to column-average \n", + "\n", + "column_means = final_df.mean()\n", + "\n", + "merged_df = final_df.fillna(column_means)\n", + "\n", + "#check if there are left NaNs in merged_df\n", + "\n", + "total_missing_values = merged_df.isna().sum().sum()\n", + "\n", + "print(\"total number of missing values in merged_df:\", total_missing_values)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th></th>\n", + " <th>Correlation Coefficient</th>\n", + " <th>P-Value</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Column 1</th>\n", + " <th>Column 2</th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th rowspan=\"5\" valign=\"top\">Anzahl der Einwohner: innen</th>\n", + " <th>Anzahl der Kinder und Jugendlichen unter 18 Jahren</th>\n", + " <td>0.967730</td>\n", + " <td>7.170216e-60</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Anteil Kinder und Jugendlicher unter 18 Jahren an der Gesamt-bevölkerung</th>\n", + " <td>-0.130214</td>\n", + " <td>1.989182e-01</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Anzahl älterer Einwohner: innen über 64 Jahren</th>\n", + " <td>0.946158</td>\n", + " <td>2.602575e-49</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Anteil älterer Einwohner: innen über 64 Jahren an der Gesamt-bevölkerung</th>\n", + " <td>0.032310</td>\n", + " <td>7.508825e-01</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Anzahl der Haushalte</th>\n", + " <td>0.982434</td>\n", + " <td>1.568244e-72</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th rowspan=\"5\" valign=\"top\">distance_rathaus</th>\n", + " <th>biosupermarkets_count</th>\n", + " <td>-0.197456</td>\n", + " <td>5.010748e-02</td>\n", + " </tr>\n", + " <tr>\n", + " <th>all_restaurants_count</th>\n", + " <td>-0.514299</td>\n", + " <td>5.175046e-08</td>\n", + " </tr>\n", + " <tr>\n", + " <th>organic_restaurants_count</th>\n", + " <td>-0.331365</td>\n", + " <td>8.070870e-04</td>\n", + " </tr>\n", + " <tr>\n", + " <th>vegan_restaurants_count</th>\n", + " <td>-0.440689</td>\n", + " <td>4.994682e-06</td>\n", + " </tr>\n", + " <tr>\n", + " <th>art_score</th>\n", + " <td>-0.301427</td>\n", + " <td>2.430041e-03</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>1482 rows × 2 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Correlation Coefficient \\\n", + "Column 1 Column 2 \n", + "Anzahl der Einwohner: innen Anzahl der Kinder und Jugendlichen unter 18 Jahren 0.967730 \n", + " Anteil Kinder und Jugendlicher unter 18 Jahren ... -0.130214 \n", + " Anzahl älterer Einwohner: innen über 64 Jahren 0.946158 \n", + " Anteil älterer Einwohner: innen über 64 Jahren ... 0.032310 \n", + " Anzahl der Haushalte 0.982434 \n", + "... ... \n", + "distance_rathaus biosupermarkets_count -0.197456 \n", + " all_restaurants_count -0.514299 \n", + " organic_restaurants_count -0.331365 \n", + " vegan_restaurants_count -0.440689 \n", + " art_score -0.301427 \n", + "\n", + " P-Value \n", + "Column 1 Column 2 \n", + "Anzahl der Einwohner: innen Anzahl der Kinder und Jugendlichen unter 18 Jahren 7.170216e-60 \n", + " Anteil Kinder und Jugendlicher unter 18 Jahren ... 1.989182e-01 \n", + " Anzahl älterer Einwohner: innen über 64 Jahren 2.602575e-49 \n", + " Anteil älterer Einwohner: innen über 64 Jahren ... 7.508825e-01 \n", + " Anzahl der Haushalte 1.568244e-72 \n", + "... ... \n", + "distance_rathaus biosupermarkets_count 5.010748e-02 \n", + " all_restaurants_count 5.175046e-08 \n", + " organic_restaurants_count 8.070870e-04 \n", + " vegan_restaurants_count 4.994682e-06 \n", + " art_score 2.430041e-03 \n", + "\n", + "[1482 rows x 2 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "from scipy.stats import pearsonr\n", + "\n", + "#Dataframe for computing Corr_Pvalue_VIF\n", + "#merged_df\n", + "\n", + "#check if there are left NaNs in merged_df\n", + "\n", + "#total_missing_values = merged_df.isna().sum().sum()\n", + "#print(\"total number of missing values in merged_df:\", total_missing_values)\n", + "\n", + "\n", + "#-------------------------making function for correlation & P-value--------------------------------- \n", + "\n", + "\n", + "def calculate_correlation_and_pvalue(df):\n", + " correlation_results = []\n", + "\n", + " for col1 in df.columns:\n", + " for col2 in df.columns:\n", + " if col1 != col2: # except combinatioln with the coulumn itself\n", + " correlation_coefficient, p_value = pearsonr(df[col1], df[col2])\n", + " result = {\n", + " 'Column 1': col1,\n", + " 'Column 2': col2,\n", + " 'Correlation Coefficient': correlation_coefficient,\n", + " 'P-Value': p_value\n", + " }\n", + " correlation_results.append(result)\n", + "\n", + " return pd.DataFrame(correlation_results)\n", + "\n", + "\n", + "# Compute correlation & p-value \n", + "corr_df = calculate_correlation_and_pvalue(merged_df)\n", + "corr_df = corr_df.set_index([corr_df.columns[0], corr_df.columns[1]])\n", + "corr_df" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Variable VIF\n", + "0 Anzahl der Einwohner: innen 1.440139e+04\n", + "1 Anzahl der Kinder und Jugendlichen unter 18 Ja... 2.531331e+03\n", + "2 Anteil Kinder und Jugendlicher unter 18 Jahren... 3.036803e+02\n", + "3 Anzahl älterer Einwohner: innen über 64 Jahren 5.055914e+02\n", + "4 Anteil älterer Einwohner: innen über 64 Jahren... 1.236074e+02\n", + "5 Anzahl der Haushalte 2.408194e+04\n", + "6 Durch-schnittliche Anzahl der Personen je Haus... 1.800839e+03\n", + "7 Fläche in km² 8.274478e+00\n", + "8 Einwohner: innen je km² 1.312299e+01\n", + "9 Sozial-versicherungs-pflichtig Beschäftigte am... 1.552534e+02\n", + "10 Anteil sozial-versicherungs-pflichtig Beschäft... 1.054074e+03\n", + "11 Anzahl der Arbeitslosen 1.417910e+02\n", + "12 Anzahl der Wohngebäude 2.020616e+03\n", + "13 Anzahl der Wohnungen 3.534520e+03\n", + "14 Durch-schnittliche Wohnungs-größe in m² 1.361012e+03\n", + "15 Durch-schnittliche Wohnfläche je Einwohner:in ... 1.110014e+03\n", + "16 Anzahl der Sozial-wohnungen 2.460560e+01\n", + "17 Anteil der Sozial-wohnungen an allen Wohnungen 7.758257e+00\n", + "18 Durch-schnittlicher Immobilien-preis für ein G... 1.840842e+01\n", + "19 Durch-schnittlicher Immobilien-preis für eine ... 8.253178e+01\n", + "20 Anzahl der Wohnungen in Ein- und Zweifamilien-... 1.037326e+03\n", + "21 Anteil der Wohnungen in Ein- und Zweifamilien-... 5.948718e+01\n", + "22 Anzahl der Einpersonen-haushalte 6.734208e+03\n", + "23 Anteil der Haushalte, in denen nur eine Person... 2.626283e+02\n", + "24 Gesamtbetrag der Einkünfte - [Steuerpflichtig] 2.839704e+03\n", + "25 Gesamtbetrag der Einkünfte - [1000€] 5.222186e+03\n", + "26 Festgesetzte Einkommenssteuer/ Jahreslohnsteue... 1.929347e+03\n", + "27 Gesamtbetrag Einkünfte Mittelwert - [€] 1.527250e+02\n", + "28 Gesamtbetrag Einkünfte Median - [€] 2.140164e+02\n", + "29 market_count inf\n", + "30 farms_count 1.924990e+00\n", + "31 greencrocers_count inf\n", + "32 supermarkets_count inf\n", + "33 biosupermarkets_count inf\n", + "34 all_restaurants_count 1.992225e+01\n", + "35 organic_restaurants_count 4.292400e+00\n", + "36 vegan_restaurants_count 8.208440e+00\n", + "37 art_score inf\n", + "38 distance_rathaus 1.173467e+01\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\HOME\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\statsmodels\\stats\\outliers_influence.py:198: RuntimeWarning: divide by zero encountered in scalar divide\n", + " vif = 1. / (1. - r_squared_i)\n" + ] + } + ], + "source": [ + "#--------------------------------------------compute VIF-------------------------------------------\n", + "\n", + "from statsmodels.stats.outliers_influence import variance_inflation_factor\n", + "\n", + "# define VIF function\n", + "def calculate_vif(data_frame):\n", + " variables = data_frame.columns\n", + " vif_data = pd.DataFrame()\n", + " vif_data[\"Variable\"] = variables\n", + " vif_data[\"VIF\"] = [variance_inflation_factor(data_frame.values, i) for i in range(data_frame.shape[1])]\n", + " return vif_data\n", + "\n", + "# Compute VIF\n", + "vif_result = calculate_vif(merged_df)\n", + "print(vif_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fianl Selected Variables for predicting Weekend Markets" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>market_count</th>\n", + " <th>greencrocers_count</th>\n", + " <th>supermarkets_count</th>\n", + " <th>biosupermarkets_count</th>\n", + " <th>all_restaurants_count</th>\n", + " <th>organic_restaurants_count</th>\n", + " <th>vegan_restaurants_count</th>\n", + " <th>Einwohner: innen je km²</th>\n", + " <th>Gesamtbetrag Einkünfte Median - [€]</th>\n", + " <th>Anteil der Sozial-wohnungen an allen Wohnungen</th>\n", + " <th>distance_rathaus</th>\n", + " </tr>\n", + " <tr>\n", + " <th>stadtteil</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Hamburg-Altstadt</th>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>4</td>\n", + " <td>0</td>\n", + " <td>129</td>\n", + " <td>0</td>\n", + " <td>4</td>\n", + " <td>2447.0</td>\n", + " <td>10811.0</td>\n", + " <td>11.8</td>\n", + " <td>0.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>HafenCity</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>6</td>\n", + " <td>1</td>\n", + " <td>56</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>2865.0</td>\n", + " <td>57913.0</td>\n", + " <td>27.6</td>\n", + " <td>0.005758</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Neustadt</th>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>6</td>\n", + " <td>1</td>\n", + " <td>140</td>\n", + " <td>4</td>\n", + " <td>4</td>\n", + " <td>5592.0</td>\n", + " <td>24715.0</td>\n", + " <td>12.9</td>\n", + " <td>0.001174</td>\n", + " </tr>\n", + " <tr>\n", + " <th>St. Pauli</th>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>10</td>\n", + " <td>0</td>\n", + " <td>109</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>9836.0</td>\n", + " <td>19399.0</td>\n", + " <td>12.0</td>\n", + " <td>0.009631</td>\n", + " </tr>\n", + " <tr>\n", + " <th>St. Georg</th>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>11</td>\n", + " <td>0</td>\n", + " <td>80</td>\n", + " <td>2</td>\n", + " <td>4</td>\n", + " <td>6758.0</td>\n", + " <td>27161.0</td>\n", + " <td>11.9</td>\n", + " <td>0.008493</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Hausbruch</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1715.0</td>\n", + " <td>21355.0</td>\n", + " <td>14.1</td>\n", + " <td>0.104116</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Neugraben-Fischbek</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>8</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1508.0</td>\n", + " <td>22492.0</td>\n", + " <td>7.3</td>\n", + " <td>0.130211</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Francop</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>84.0</td>\n", + " <td>26568.0</td>\n", + " <td>0.0</td>\n", + " <td>0.114626</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Neuenfelde</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>335.0</td>\n", + " <td>22909.0</td>\n", + " <td>15.2</td>\n", + " <td>0.162791</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Cranz</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>608.0</td>\n", + " <td>22852.0</td>\n", + " <td>0.0</td>\n", + " <td>0.200686</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>99 rows × 11 columns</p>\n", + "</div>" + ], + "text/plain": [ + " market_count greencrocers_count supermarkets_count \\\n", + "stadtteil \n", + "Hamburg-Altstadt 2 1 4 \n", + "HafenCity 1 0 6 \n", + "Neustadt 1 2 6 \n", + "St. Pauli 1 1 10 \n", + "St. Georg 2 0 11 \n", + "... ... ... ... \n", + "Hausbruch 0 1 1 \n", + "Neugraben-Fischbek 1 0 4 \n", + "Francop 0 0 0 \n", + "Neuenfelde 0 1 0 \n", + "Cranz 0 0 0 \n", + "\n", + " biosupermarkets_count all_restaurants_count \\\n", + "stadtteil \n", + "Hamburg-Altstadt 0 129 \n", + "HafenCity 1 56 \n", + "Neustadt 1 140 \n", + "St. Pauli 0 109 \n", + "St. Georg 0 80 \n", + "... ... ... \n", + "Hausbruch 0 1 \n", + "Neugraben-Fischbek 1 8 \n", + "Francop 0 0 \n", + "Neuenfelde 0 2 \n", + "Cranz 0 2 \n", + "\n", + " organic_restaurants_count vegan_restaurants_count \\\n", + "stadtteil \n", + "Hamburg-Altstadt 0 4 \n", + "HafenCity 3 1 \n", + "Neustadt 4 4 \n", + "St. Pauli 1 2 \n", + "St. Georg 2 4 \n", + "... ... ... \n", + "Hausbruch 0 0 \n", + "Neugraben-Fischbek 0 0 \n", + "Francop 0 0 \n", + "Neuenfelde 0 0 \n", + "Cranz 0 0 \n", + "\n", + " Einwohner: innen je km² \\\n", + "stadtteil \n", + "Hamburg-Altstadt 2447.0 \n", + "HafenCity 2865.0 \n", + "Neustadt 5592.0 \n", + "St. Pauli 9836.0 \n", + "St. Georg 6758.0 \n", + "... ... \n", + "Hausbruch 1715.0 \n", + "Neugraben-Fischbek 1508.0 \n", + "Francop 84.0 \n", + "Neuenfelde 335.0 \n", + "Cranz 608.0 \n", + "\n", + " Gesamtbetrag Einkünfte Median - [€] \\\n", + "stadtteil \n", + "Hamburg-Altstadt 10811.0 \n", + "HafenCity 57913.0 \n", + "Neustadt 24715.0 \n", + "St. Pauli 19399.0 \n", + "St. Georg 27161.0 \n", + "... ... \n", + "Hausbruch 21355.0 \n", + "Neugraben-Fischbek 22492.0 \n", + "Francop 26568.0 \n", + "Neuenfelde 22909.0 \n", + "Cranz 22852.0 \n", + "\n", + " Anteil der Sozial-wohnungen an allen Wohnungen \\\n", + "stadtteil \n", + "Hamburg-Altstadt 11.8 \n", + "HafenCity 27.6 \n", + "Neustadt 12.9 \n", + "St. Pauli 12.0 \n", + "St. Georg 11.9 \n", + "... ... \n", + "Hausbruch 14.1 \n", + "Neugraben-Fischbek 7.3 \n", + "Francop 0.0 \n", + "Neuenfelde 15.2 \n", + "Cranz 0.0 \n", + "\n", + " distance_rathaus \n", + "stadtteil \n", + "Hamburg-Altstadt 0.000000 \n", + "HafenCity 0.005758 \n", + "Neustadt 0.001174 \n", + "St. Pauli 0.009631 \n", + "St. Georg 0.008493 \n", + "... ... \n", + "Hausbruch 0.104116 \n", + "Neugraben-Fischbek 0.130211 \n", + "Francop 0.114626 \n", + "Neuenfelde 0.162791 \n", + "Cranz 0.200686 \n", + "\n", + "[99 rows x 11 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#---------------------- make the dataframe for final selected variables for weekend market prediction-------------------------\n", + "\n", + "\n", + "# Name of colums for weekend market prediction\n", + "columns_weekend = ['market_count','greencrocers_count' , 'supermarkets_count', 'biosupermarkets_count', 'all_restaurants_count', 'organic_restaurants_count',\n", + " 'vegan_restaurants_count', 'Einwohner: innen je km²', 'Gesamtbetrag Einkünfte Median - [€]', 'Anteil der Sozial-wohnungen an allen Wohnungen',\n", + " 'distance_rathaus']\n", + "\n", + "# Filter merged_df for these columns\n", + "final_weekend_df = merged_df[columns_weekend]\n", + "\n", + "final_weekend_df\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th></th>\n", + " <th>Correlation Coefficient</th>\n", + " <th>P-Value</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Column 1</th>\n", + " <th>Column 2</th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th rowspan=\"5\" valign=\"top\">market_count</th>\n", + " <th>greencrocers_count</th>\n", + " <td>0.519896</td>\n", + " <td>3.492254e-08</td>\n", + " </tr>\n", + " <tr>\n", + " <th>supermarkets_count</th>\n", + " <td>0.664395</td>\n", + " <td>6.505037e-14</td>\n", + " </tr>\n", + " <tr>\n", + " <th>biosupermarkets_count</th>\n", + " <td>0.410297</td>\n", + " <td>2.466621e-05</td>\n", + " </tr>\n", + " <tr>\n", + " <th>all_restaurants_count</th>\n", + " <td>0.537170</td>\n", + " <td>9.909508e-09</td>\n", + " </tr>\n", + " <tr>\n", + " <th>organic_restaurants_count</th>\n", + " <td>0.143589</td>\n", + " <td>1.562180e-01</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th rowspan=\"5\" valign=\"top\">distance_rathaus</th>\n", + " <th>organic_restaurants_count</th>\n", + " <td>-0.331365</td>\n", + " <td>8.070870e-04</td>\n", + " </tr>\n", + " <tr>\n", + " <th>vegan_restaurants_count</th>\n", + " <td>-0.440689</td>\n", + " <td>4.994682e-06</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Einwohner: innen je km²</th>\n", + " <td>-0.513015</td>\n", + " <td>5.658125e-08</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Gesamtbetrag Einkünfte Median - [€]</th>\n", + " <td>0.229323</td>\n", + " <td>2.241430e-02</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Anteil der Sozial-wohnungen an allen Wohnungen</th>\n", + " <td>-0.132204</td>\n", + " <td>1.920838e-01</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>110 rows × 2 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Correlation Coefficient \\\n", + "Column 1 Column 2 \n", + "market_count greencrocers_count 0.519896 \n", + " supermarkets_count 0.664395 \n", + " biosupermarkets_count 0.410297 \n", + " all_restaurants_count 0.537170 \n", + " organic_restaurants_count 0.143589 \n", + "... ... \n", + "distance_rathaus organic_restaurants_count -0.331365 \n", + " vegan_restaurants_count -0.440689 \n", + " Einwohner: innen je km² -0.513015 \n", + " Gesamtbetrag Einkünfte Median - [€] 0.229323 \n", + " Anteil der Sozial-wohnungen an allen Wohnungen -0.132204 \n", + "\n", + " P-Value \n", + "Column 1 Column 2 \n", + "market_count greencrocers_count 3.492254e-08 \n", + " supermarkets_count 6.505037e-14 \n", + " biosupermarkets_count 2.466621e-05 \n", + " all_restaurants_count 9.909508e-09 \n", + " organic_restaurants_count 1.562180e-01 \n", + "... ... \n", + "distance_rathaus organic_restaurants_count 8.070870e-04 \n", + " vegan_restaurants_count 4.994682e-06 \n", + " Einwohner: innen je km² 5.658125e-08 \n", + " Gesamtbetrag Einkünfte Median - [€] 2.241430e-02 \n", + " Anteil der Sozial-wohnungen an allen Wohnungen 1.920838e-01 \n", + "\n", + "[110 rows x 2 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#------------------------------------calculate Corr and Pvalue for variables of weekend market prediction--------------------\n", + "\n", + "# Compute correlation & p-value \n", + "weekend_corr_df = calculate_correlation_and_pvalue(final_weekend_df)\n", + "weekend_corr_df = weekend_corr_df.set_index([weekend_corr_df.columns[0], weekend_corr_df.columns[1]])\n", + "weekend_corr_df" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Variable</th>\n", + " <th>VIF</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>market_count</td>\n", + " <td>4.470712</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>greencrocers_count</td>\n", + " <td>2.770428</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>supermarkets_count</td>\n", + " <td>5.223030</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>biosupermarkets_count</td>\n", + " <td>2.150442</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>all_restaurants_count</td>\n", + " <td>11.962411</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>organic_restaurants_count</td>\n", + " <td>2.391146</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>vegan_restaurants_count</td>\n", + " <td>5.502589</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>Einwohner: innen je km²</td>\n", + " <td>3.581137</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>Gesamtbetrag Einkünfte Median - [€]</td>\n", + " <td>8.177873</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>Anteil der Sozial-wohnungen an allen Wohnungen</td>\n", + " <td>1.452964</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>distance_rathaus</td>\n", + " <td>5.237429</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Variable VIF\n", + "0 market_count 4.470712\n", + "1 greencrocers_count 2.770428\n", + "2 supermarkets_count 5.223030\n", + "3 biosupermarkets_count 2.150442\n", + "4 all_restaurants_count 11.962411\n", + "5 organic_restaurants_count 2.391146\n", + "6 vegan_restaurants_count 5.502589\n", + "7 Einwohner: innen je km² 3.581137\n", + "8 Gesamtbetrag Einkünfte Median - [€] 8.177873\n", + "9 Anteil der Sozial-wohnungen an allen Wohnungen 1.452964\n", + "10 distance_rathaus 5.237429" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#-------------------------calculate VIF for variables of weekend market prediction-----------------------\n", + "\n", + "# Compute VIF\n", + "weekend_vif_result = calculate_vif(final_weekend_df)\n", + "weekend_vif_result" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fianl Selected Variables for predicting art_score" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>art_score</th>\n", + " <th>vegan_restaurants_count</th>\n", + " <th>organic_restaurants_count</th>\n", + " <th>Einwohner: innen je km²</th>\n", + " <th>distance_rathaus</th>\n", + " <th>Gesamtbetrag Einkünfte Median - [€]</th>\n", + " <th>Anteil Kinder und Jugendlicher unter 18 Jahren an der Gesamt-bevölkerung</th>\n", + " <th>Anteil älterer Einwohner: innen über 64 Jahren an der Gesamt-bevölkerung</th>\n", + " <th>Durch-schnittliche Wohnfläche je Einwohner:in in m²</th>\n", + " <th>Anteil der Sozial-wohnungen an allen Wohnungen</th>\n", + " <th>Durch-schnittliche Anzahl der Personen je Haushalt</th>\n", + " </tr>\n", + " <tr>\n", + " <th>stadtteil</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Hamburg-Altstadt</th>\n", + " <td>2.565476</td>\n", + " <td>4</td>\n", + " <td>0</td>\n", + " <td>2447.0</td>\n", + " <td>0.000000</td>\n", + " <td>10811.0</td>\n", + " <td>16.2</td>\n", + " <td>9.9</td>\n", + " <td>34.6</td>\n", + " <td>11.8</td>\n", + " <td>1.7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>HafenCity</th>\n", + " <td>1.952381</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>2865.0</td>\n", + " <td>0.005758</td>\n", + " <td>57913.0</td>\n", + " <td>19.9</td>\n", + " <td>9.3</td>\n", + " <td>45.7</td>\n", + " <td>27.6</td>\n", + " <td>2.2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Neustadt</th>\n", + " <td>2.702381</td>\n", + " <td>4</td>\n", + " <td>4</td>\n", + " <td>5592.0</td>\n", + " <td>0.001174</td>\n", + " <td>24715.0</td>\n", + " <td>11.2</td>\n", + " <td>15.2</td>\n", + " <td>38.4</td>\n", + " <td>12.9</td>\n", + " <td>1.5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>St. Pauli</th>\n", + " <td>1.851190</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>9836.0</td>\n", + " <td>0.009631</td>\n", + " <td>19399.0</td>\n", + " <td>13.3</td>\n", + " <td>10.3</td>\n", + " <td>36.9</td>\n", + " <td>12.0</td>\n", + " <td>1.5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>St. Georg</th>\n", + " <td>2.523810</td>\n", + " <td>4</td>\n", + " <td>2</td>\n", + " <td>6758.0</td>\n", + " <td>0.008493</td>\n", + " <td>27161.0</td>\n", + " <td>11.5</td>\n", + " <td>13.6</td>\n", + " <td>37.2</td>\n", + " <td>11.9</td>\n", + " <td>1.6</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Hausbruch</th>\n", + " <td>0.422619</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1715.0</td>\n", + " <td>0.104116</td>\n", + " <td>21355.0</td>\n", + " <td>18.9</td>\n", + " <td>21.1</td>\n", + " <td>36.9</td>\n", + " <td>14.1</td>\n", + " <td>2.1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Neugraben-Fischbek</th>\n", + " <td>1.857143</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1508.0</td>\n", + " <td>0.130211</td>\n", + " <td>22492.0</td>\n", + " <td>22.0</td>\n", + " <td>18.6</td>\n", + " <td>37.8</td>\n", + " <td>7.3</td>\n", + " <td>2.2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Francop</th>\n", + " <td>0.000000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>84.0</td>\n", + " <td>0.114626</td>\n", + " <td>26568.0</td>\n", + " <td>16.2</td>\n", + " <td>18.1</td>\n", + " <td>46.4</td>\n", + " <td>0.0</td>\n", + " <td>2.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Neuenfelde</th>\n", + " <td>0.375000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>335.0</td>\n", + " <td>0.162791</td>\n", + " <td>22909.0</td>\n", + " <td>23.9</td>\n", + " <td>14.3</td>\n", + " <td>37.1</td>\n", + " <td>15.2</td>\n", + " <td>2.2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Cranz</th>\n", + " <td>0.000000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>608.0</td>\n", + " <td>0.200686</td>\n", + " <td>22852.0</td>\n", + " <td>16.0</td>\n", + " <td>19.0</td>\n", + " <td>43.7</td>\n", + " <td>0.0</td>\n", + " <td>1.8</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>99 rows × 11 columns</p>\n", + "</div>" + ], + "text/plain": [ + " art_score vegan_restaurants_count \\\n", + "stadtteil \n", + "Hamburg-Altstadt 2.565476 4 \n", + "HafenCity 1.952381 1 \n", + "Neustadt 2.702381 4 \n", + "St. Pauli 1.851190 2 \n", + "St. Georg 2.523810 4 \n", + "... ... ... \n", + "Hausbruch 0.422619 0 \n", + "Neugraben-Fischbek 1.857143 0 \n", + "Francop 0.000000 0 \n", + "Neuenfelde 0.375000 0 \n", + "Cranz 0.000000 0 \n", + "\n", + " organic_restaurants_count Einwohner: innen je km² \\\n", + "stadtteil \n", + "Hamburg-Altstadt 0 2447.0 \n", + "HafenCity 3 2865.0 \n", + "Neustadt 4 5592.0 \n", + "St. Pauli 1 9836.0 \n", + "St. Georg 2 6758.0 \n", + "... ... ... \n", + "Hausbruch 0 1715.0 \n", + "Neugraben-Fischbek 0 1508.0 \n", + "Francop 0 84.0 \n", + "Neuenfelde 0 335.0 \n", + "Cranz 0 608.0 \n", + "\n", + " distance_rathaus Gesamtbetrag Einkünfte Median - [€] \\\n", + "stadtteil \n", + "Hamburg-Altstadt 0.000000 10811.0 \n", + "HafenCity 0.005758 57913.0 \n", + "Neustadt 0.001174 24715.0 \n", + "St. Pauli 0.009631 19399.0 \n", + "St. Georg 0.008493 27161.0 \n", + "... ... ... \n", + "Hausbruch 0.104116 21355.0 \n", + "Neugraben-Fischbek 0.130211 22492.0 \n", + "Francop 0.114626 26568.0 \n", + "Neuenfelde 0.162791 22909.0 \n", + "Cranz 0.200686 22852.0 \n", + "\n", + " Anteil Kinder und Jugendlicher unter 18 Jahren an der Gesamt-bevölkerung \\\n", + "stadtteil \n", + "Hamburg-Altstadt 16.2 \n", + "HafenCity 19.9 \n", + "Neustadt 11.2 \n", + "St. Pauli 13.3 \n", + "St. Georg 11.5 \n", + "... ... \n", + "Hausbruch 18.9 \n", + "Neugraben-Fischbek 22.0 \n", + "Francop 16.2 \n", + "Neuenfelde 23.9 \n", + "Cranz 16.0 \n", + "\n", + " Anteil älterer Einwohner: innen über 64 Jahren an der Gesamt-bevölkerung \\\n", + "stadtteil \n", + "Hamburg-Altstadt 9.9 \n", + "HafenCity 9.3 \n", + "Neustadt 15.2 \n", + "St. Pauli 10.3 \n", + "St. Georg 13.6 \n", + "... ... \n", + "Hausbruch 21.1 \n", + "Neugraben-Fischbek 18.6 \n", + "Francop 18.1 \n", + "Neuenfelde 14.3 \n", + "Cranz 19.0 \n", + "\n", + " Durch-schnittliche Wohnfläche je Einwohner:in in m² \\\n", + "stadtteil \n", + "Hamburg-Altstadt 34.6 \n", + "HafenCity 45.7 \n", + "Neustadt 38.4 \n", + "St. Pauli 36.9 \n", + "St. Georg 37.2 \n", + "... ... \n", + "Hausbruch 36.9 \n", + "Neugraben-Fischbek 37.8 \n", + "Francop 46.4 \n", + "Neuenfelde 37.1 \n", + "Cranz 43.7 \n", + "\n", + " Anteil der Sozial-wohnungen an allen Wohnungen \\\n", + "stadtteil \n", + "Hamburg-Altstadt 11.8 \n", + "HafenCity 27.6 \n", + "Neustadt 12.9 \n", + "St. Pauli 12.0 \n", + "St. Georg 11.9 \n", + "... ... \n", + "Hausbruch 14.1 \n", + "Neugraben-Fischbek 7.3 \n", + "Francop 0.0 \n", + "Neuenfelde 15.2 \n", + "Cranz 0.0 \n", + "\n", + " Durch-schnittliche Anzahl der Personen je Haushalt \n", + "stadtteil \n", + "Hamburg-Altstadt 1.7 \n", + "HafenCity 2.2 \n", + "Neustadt 1.5 \n", + "St. Pauli 1.5 \n", + "St. Georg 1.6 \n", + "... ... \n", + "Hausbruch 2.1 \n", + "Neugraben-Fischbek 2.2 \n", + "Francop 2.0 \n", + "Neuenfelde 2.2 \n", + "Cranz 1.8 \n", + "\n", + "[99 rows x 11 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#---------------------- make the dataframe for final selected variables for art_score prediction-------------------------\n", + "\n", + "\n", + "# Name of colums for weekend market prediction\n", + "columns_art_score = ['art_score', 'vegan_restaurants_count', 'organic_restaurants_count', 'Einwohner: innen je km²', 'distance_rathaus',\n", + " 'Gesamtbetrag Einkünfte Median - [€]', 'Anteil Kinder und Jugendlicher unter 18 Jahren an der Gesamt-bevölkerung',\n", + " 'Anteil älterer Einwohner: innen über 64 Jahren an der Gesamt-bevölkerung', 'Durch-schnittliche Wohnfläche je Einwohner:in in m²',\n", + " 'Anteil der Sozial-wohnungen an allen Wohnungen', 'Durch-schnittliche Anzahl der Personen je Haushalt']\n", + "\n", + "# Filter merged_df for these columns\n", + "final_art_score_df = merged_df[columns_art_score]\n", + "\n", + "final_art_score_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th></th>\n", + " <th>Correlation Coefficient</th>\n", + " <th>P-Value</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Column 1</th>\n", + " <th>Column 2</th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th rowspan=\"5\" valign=\"top\">art_score</th>\n", + " <th>vegan_restaurants_count</th>\n", + " <td>0.447469</td>\n", + " <td>3.423501e-06</td>\n", + " </tr>\n", + " <tr>\n", + " <th>organic_restaurants_count</th>\n", + " <td>0.327277</td>\n", + " <td>9.444683e-04</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Einwohner: innen je km²</th>\n", + " <td>0.459954</td>\n", + " <td>1.671102e-06</td>\n", + " </tr>\n", + " <tr>\n", + " <th>distance_rathaus</th>\n", + " <td>-0.301427</td>\n", + " <td>2.430041e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Gesamtbetrag Einkünfte Median - [€]</th>\n", + " <td>0.022767</td>\n", + " <td>8.230083e-01</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th rowspan=\"5\" valign=\"top\">Durch-schnittliche Anzahl der Personen je Haushalt</th>\n", + " <th>Gesamtbetrag Einkünfte Median - [€]</th>\n", + " <td>0.333351</td>\n", + " <td>7.471834e-04</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Anteil Kinder und Jugendlicher unter 18 Jahren an der Gesamt-bevölkerung</th>\n", + " <td>0.782907</td>\n", + " <td>1.042553e-21</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Anteil älterer Einwohner: innen über 64 Jahren an der Gesamt-bevölkerung</th>\n", + " <td>0.483606</td>\n", + " <td>3.963795e-07</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Durch-schnittliche Wohnfläche je Einwohner:in in m²</th>\n", + " <td>0.336097</td>\n", + " <td>6.710017e-04</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Anteil der Sozial-wohnungen an allen Wohnungen</th>\n", + " <td>0.087142</td>\n", + " <td>3.910735e-01</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>110 rows × 2 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Correlation Coefficient \\\n", + "Column 1 Column 2 \n", + "art_score vegan_restaurants_count 0.447469 \n", + " organic_restaurants_count 0.327277 \n", + " Einwohner: innen je km² 0.459954 \n", + " distance_rathaus -0.301427 \n", + " Gesamtbetrag Einkünfte Median - [€] 0.022767 \n", + "... ... \n", + "Durch-schnittliche Anzahl der Personen je Haushalt Gesamtbetrag Einkünfte Median - [€] 0.333351 \n", + " Anteil Kinder und Jugendlicher unter 18 Jahren ... 0.782907 \n", + " Anteil älterer Einwohner: innen über 64 Jahren ... 0.483606 \n", + " Durch-schnittliche Wohnfläche je Einwohner:in i... 0.336097 \n", + " Anteil der Sozial-wohnungen an allen Wohnungen 0.087142 \n", + "\n", + " P-Value \n", + "Column 1 Column 2 \n", + "art_score vegan_restaurants_count 3.423501e-06 \n", + " organic_restaurants_count 9.444683e-04 \n", + " Einwohner: innen je km² 1.671102e-06 \n", + " distance_rathaus 2.430041e-03 \n", + " Gesamtbetrag Einkünfte Median - [€] 8.230083e-01 \n", + "... ... \n", + "Durch-schnittliche Anzahl der Personen je Haushalt Gesamtbetrag Einkünfte Median - [€] 7.471834e-04 \n", + " Anteil Kinder und Jugendlicher unter 18 Jahren ... 1.042553e-21 \n", + " Anteil älterer Einwohner: innen über 64 Jahren ... 3.963795e-07 \n", + " Durch-schnittliche Wohnfläche je Einwohner:in i... 6.710017e-04 \n", + " Anteil der Sozial-wohnungen an allen Wohnungen 3.910735e-01 \n", + "\n", + "[110 rows x 2 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#------------------------------------calculate Corr and Pvalue for variables of art_score prediction--------------------\n", + "\n", + "# Compute correlation & p-value \n", + "art_score_corr_df = calculate_correlation_and_pvalue(final_art_score_df)\n", + "art_score_corr_df = art_score_corr_df.set_index([art_score_corr_df.columns[0], art_score_corr_df.columns[1]])\n", + "art_score_corr_df" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Variable</th>\n", + " <th>VIF</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>art_score</td>\n", + " <td>2.730099</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>vegan_restaurants_count</td>\n", + " <td>2.074623</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>organic_restaurants_count</td>\n", + " <td>1.796149</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Einwohner: innen je km²</td>\n", + " <td>2.972563</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>distance_rathaus</td>\n", + " <td>7.462578</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>Gesamtbetrag Einkünfte Median - [€]</td>\n", + " <td>47.796667</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>Anteil Kinder und Jugendlicher unter 18 Jahren...</td>\n", + " <td>116.132614</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>Anteil älterer Einwohner: innen über 64 Jahren...</td>\n", + " <td>34.191178</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>Durch-schnittliche Wohnfläche je Einwohner:in ...</td>\n", + " <td>196.419521</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>Anteil der Sozial-wohnungen an allen Wohnungen</td>\n", + " <td>2.620196</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>Durch-schnittliche Anzahl der Personen je Haus...</td>\n", + " <td>322.273322</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Variable VIF\n", + "0 art_score 2.730099\n", + "1 vegan_restaurants_count 2.074623\n", + "2 organic_restaurants_count 1.796149\n", + "3 Einwohner: innen je km² 2.972563\n", + "4 distance_rathaus 7.462578\n", + "5 Gesamtbetrag Einkünfte Median - [€] 47.796667\n", + "6 Anteil Kinder und Jugendlicher unter 18 Jahren... 116.132614\n", + "7 Anteil älterer Einwohner: innen über 64 Jahren... 34.191178\n", + "8 Durch-schnittliche Wohnfläche je Einwohner:in ... 196.419521\n", + "9 Anteil der Sozial-wohnungen an allen Wohnungen 2.620196\n", + "10 Durch-schnittliche Anzahl der Personen je Haus... 322.273322" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#-------------------------calculate VIF for variables of art_score prediction-----------------------\n", + "\n", + "# Compute VIF\n", + "art_score_vif_result = calculate_vif(final_art_score_df)\n", + "art_score_vif_result" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Making the result in an excel file" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "#-------------------------- making the reult in one excel file--------------------------------------\n", + "#excel_file_path = 'Final_correlation_pvalue_VIF_with_count_per_stadtteil(99)_distance_to_rathaus.xlsx'\n", + "\n", + "#with pd.ExcelWriter(excel_file_path, engine='xlsxwriter') as writer:\n", + " ##each dataframe in different sheet\n", + " #corr_df.to_excel(writer, sheet_name='All_var_Corr_Pvalue', index=True)\n", + " #vif_result.to_excel(writer, sheet_name='All_var_VIF', index=False)\n", + " #weekend_corr_df.to_excel(writer, sheet_name='Weekend_Var_Corr_Pvalue', index=True)\n", + " #weekend_vif_result.to_excel(writer, sheet_name='Weekend_Var_VIF', index=False)\n", + " #art_score_corr_df.to_excel(writer, sheet_name='art_score_Var_Corr_Pvalue', index=True)\n", + " #art_score_vif_result.to_excel(writer, sheet_name='art_score_Var_VIF', index=False)\n", + " \n", + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}