diff --git a/analysis.ipynb b/analysis.ipynb index be53ecbec08df1ef8a20a2a88ae14a786d0e1563..c6c1a1c81d2d069964e6baf5a9b269e3358bbd90 100644 --- a/analysis.ipynb +++ b/analysis.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 105, + "execution_count": 208, "metadata": {}, "outputs": [], "source": [ @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 209, "metadata": {}, "outputs": [ { @@ -1054,7 +1054,7 @@ "[99 rows x 39 columns]" ] }, - "execution_count": 106, + "execution_count": 209, "metadata": {}, "output_type": "execute_result" } @@ -1069,6 +1069,7 @@ "# Drop column and set index\n", "data_df.set_index(\"Stadtteil\", inplace=True)\n", "\n", + "data2_df = data_df\n", "data_df" ] }, @@ -1081,7 +1082,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 210, "metadata": {}, "outputs": [], "source": [ @@ -1115,7 +1116,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 211, "metadata": {}, "outputs": [ { @@ -1415,7 +1416,7 @@ "max 4.000000 7.208333 0.277313 " ] }, - "execution_count": 108, + "execution_count": 211, "metadata": {}, "output_type": "execute_result" } @@ -1426,7 +1427,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 212, "metadata": {}, "outputs": [ { @@ -1498,7 +1499,7 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 213, "metadata": {}, "outputs": [ { @@ -1579,7 +1580,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 214, "metadata": {}, "outputs": [ { @@ -1654,7 +1655,7 @@ }, { "cell_type": "code", - "execution_count": 112, + "execution_count": 215, "metadata": {}, "outputs": [ { @@ -1992,7 +1993,7 @@ }, { "cell_type": "code", - "execution_count": 113, + "execution_count": 216, "metadata": {}, "outputs": [ { @@ -2011,7 +2012,7 @@ "Text(0.5, 1.0, 'Density Plot of Art_Score ')" ] }, - "execution_count": 113, + "execution_count": 216, "metadata": {}, "output_type": "execute_result" }, @@ -2036,7 +2037,7 @@ }, { "cell_type": "code", - "execution_count": 114, + "execution_count": 217, "metadata": {}, "outputs": [ { @@ -2070,7 +2071,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 218, "metadata": {}, "outputs": [ { @@ -2104,7 +2105,7 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 219, "metadata": {}, "outputs": [ { @@ -2152,7 +2153,7 @@ }, { "cell_type": "code", - "execution_count": 117, + "execution_count": 220, "metadata": {}, "outputs": [ { @@ -2226,7 +2227,7 @@ }, { "cell_type": "code", - "execution_count": 118, + "execution_count": 221, "metadata": {}, "outputs": [ { @@ -2244,7 +2245,13 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ "/var/folders/lj/q1ndb2493275c2y0vzyplnk00000gn/T/ipykernel_25655/2287162676.py:19: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", @@ -2292,7 +2299,7 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 222, "metadata": {}, "outputs": [ { @@ -2333,7 +2340,7 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 223, "metadata": {}, "outputs": [ { @@ -2428,12 +2435,12 @@ " <td>4</td>\n", " <td>2.565476</td>\n", " <td>0.000000</td>\n", - " <td>2</td>\n", + " <td>1</td>\n", " <td>0</td>\n", " <td>2</td>\n", - " <td>1</td>\n", " <td>0</td>\n", - " <td>1</td>\n", + " <td>4</td>\n", + " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>HafenCity</th>\n", @@ -2456,10 +2463,10 @@ " <td>0.005758</td>\n", " <td>1</td>\n", " <td>0</td>\n", - " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", - " <td>1</td>\n", + " <td>4</td>\n", + " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>Neustadt</th>\n", @@ -2480,12 +2487,12 @@ " <td>4</td>\n", " <td>2.702381</td>\n", " <td>0.001174</td>\n", - " <td>2</td>\n", + " <td>1</td>\n", " <td>0</td>\n", " <td>2</td>\n", - " <td>1</td>\n", " <td>0</td>\n", - " <td>1</td>\n", + " <td>4</td>\n", + " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>St. Pauli</th>\n", @@ -2506,12 +2513,12 @@ " <td>2</td>\n", " <td>1.851190</td>\n", " <td>0.009631</td>\n", - " <td>2</td>\n", + " <td>1</td>\n", " <td>0</td>\n", " <td>2</td>\n", - " <td>1</td>\n", " <td>0</td>\n", - " <td>1</td>\n", + " <td>4</td>\n", + " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>St. Georg</th>\n", @@ -2532,12 +2539,12 @@ " <td>4</td>\n", " <td>2.523810</td>\n", " <td>0.008493</td>\n", - " <td>2</td>\n", + " <td>1</td>\n", " <td>0</td>\n", " <td>2</td>\n", - " <td>1</td>\n", " <td>0</td>\n", - " <td>1</td>\n", + " <td>4</td>\n", + " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", @@ -2586,10 +2593,10 @@ " <td>0.104116</td>\n", " <td>0</td>\n", " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>Neugraben-Fischbek</th>\n", @@ -2610,12 +2617,12 @@ " <td>0</td>\n", " <td>1.857143</td>\n", " <td>0.130211</td>\n", + " <td>0</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>0</td>\n", + " <td>3</td>\n", " <td>1</td>\n", - " <td>2</td>\n", - " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>Francop</th>\n", @@ -2636,12 +2643,12 @@ " <td>0</td>\n", " <td>0.000000</td>\n", " <td>0.114626</td>\n", - " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", + " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>Neuenfelde</th>\n", @@ -2664,10 +2671,10 @@ " <td>0.162791</td>\n", " <td>0</td>\n", " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>Cranz</th>\n", @@ -2688,12 +2695,12 @@ " <td>0</td>\n", " <td>0.000000</td>\n", " <td>0.200686</td>\n", - " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", + " <td>1</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -2857,36 +2864,36 @@ "\n", " KMeans_5 KMeans_2 Spectral_5 Spectral_2 Agglo_5 \\\n", "Stadtteil \n", - "Hamburg-Altstadt 2 0 2 1 0 \n", - "HafenCity 1 0 0 1 0 \n", - "Neustadt 2 0 2 1 0 \n", - "St. Pauli 2 0 2 1 0 \n", - "St. Georg 2 0 2 1 0 \n", + "Hamburg-Altstadt 1 0 2 0 4 \n", + "HafenCity 1 0 1 0 4 \n", + "Neustadt 1 0 2 0 4 \n", + "St. Pauli 1 0 2 0 4 \n", + "St. Georg 1 0 2 0 4 \n", "... ... ... ... ... ... \n", - "Hausbruch 0 1 0 0 1 \n", - "Neugraben-Fischbek 1 1 0 1 2 \n", - "Francop 0 1 0 0 1 \n", - "Neuenfelde 0 1 0 0 1 \n", - "Cranz 0 1 0 0 1 \n", + "Hausbruch 0 1 1 0 3 \n", + "Neugraben-Fischbek 0 1 1 0 3 \n", + "Francop 2 1 1 1 0 \n", + "Neuenfelde 0 1 1 0 3 \n", + "Cranz 2 1 1 1 0 \n", "\n", " Agglo_2 \n", "Stadtteil \n", - "Hamburg-Altstadt 1 \n", - "HafenCity 1 \n", - "Neustadt 1 \n", - "St. Pauli 1 \n", - "St. Georg 1 \n", + "Hamburg-Altstadt 0 \n", + "HafenCity 0 \n", + "Neustadt 0 \n", + "St. Pauli 0 \n", + "St. Georg 0 \n", "... ... \n", - "Hausbruch 0 \n", - "Neugraben-Fischbek 0 \n", - "Francop 0 \n", - "Neuenfelde 0 \n", - "Cranz 0 \n", + "Hausbruch 1 \n", + "Neugraben-Fischbek 1 \n", + "Francop 1 \n", + "Neuenfelde 1 \n", + "Cranz 1 \n", "\n", "[99 rows x 23 columns]" ] }, - "execution_count": 120, + "execution_count": 223, "metadata": {}, "output_type": "execute_result" } @@ -2897,7 +2904,7 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 224, "metadata": {}, "outputs": [], "source": [ @@ -2929,7 +2936,7 @@ }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 225, "metadata": {}, "outputs": [ { @@ -2946,7 +2953,7 @@ " dtype='object')" ] }, - "execution_count": 122, + "execution_count": 225, "metadata": {}, "output_type": "execute_result" } @@ -2972,7 +2979,7 @@ }, { "cell_type": "code", - "execution_count": 123, + "execution_count": 226, "metadata": {}, "outputs": [ { @@ -3069,7 +3076,7 @@ }, { "cell_type": "code", - "execution_count": 124, + "execution_count": 227, "metadata": {}, "outputs": [ { @@ -3118,31 +3125,9 @@ }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 228, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Test Data Evaluation:\n", - "R² Score: -0.003523284549393324\n", - "Mean Absolute Error (MAE): 0.9425595238095239\n", - "Mean Squared Error (MSE): 1.3866904053287983\n", - "Root Mean Squared Error (RMSE): 1.1775781949954738\n", - " Coefficient\n", - "Anteil Kinder und Jugendlicher unter 18 Jahren ... 0.052760\n", - "Anteil älterer Einwohner: innen über 64 Jahren ... 0.008694\n", - "Einwohner: innen je km² 0.310062\n", - "Gesamtbetrag Einkünfte Median - [€] 0.124880\n", - "Anteil der Sozial-wohnungen an allen Wohnungen 0.026643\n", - "Durch-schnittliche Wohnfläche je Einwohner:in i... 0.027564\n", - "Durch-schnittliche Anzahl der Personen je Haushalt 0.080884\n", - "organic_restaurants_count 0.000000\n", - "vegan_restaurants_count 0.243526\n", - "distance_rathaus 0.124988\n" - ] - }, { "name": "stderr", "output_type": "stream", @@ -3176,7 +3161,35 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Data Evaluation:\n", + "R² Score: -0.4464530491116274\n", + "Mean Absolute Error (MAE): 1.0651785714285713\n", + "Mean Squared Error (MSE): 1.9987404336734695\n", + "Root Mean Squared Error (RMSE): 1.413768168291205\n", + " Coefficient\n", + "Anteil Kinder und Jugendlicher unter 18 Jahren ... 0.091625\n", + "Anteil älterer Einwohner: innen über 64 Jahren ... 0.066265\n", + "Einwohner: innen je km² 0.310324\n", + "Gesamtbetrag Einkünfte Median - [€] 0.023681\n", + "Anteil der Sozial-wohnungen an allen Wohnungen 0.030918\n", + "Durch-schnittliche Wohnfläche je Einwohner:in i... 0.126750\n", + "Durch-schnittliche Anzahl der Personen je Haushalt 0.041658\n", + "organic_restaurants_count 0.000007\n", + "vegan_restaurants_count 0.243526\n", + "distance_rathaus 0.065245\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", @@ -3247,7 +3260,7 @@ }, { "cell_type": "code", - "execution_count": 126, + "execution_count": 229, "metadata": {}, "outputs": [ { @@ -3350,7 +3363,7 @@ }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 230, "metadata": {}, "outputs": [ { @@ -3457,7 +3470,7 @@ }, { "cell_type": "code", - "execution_count": 128, + "execution_count": 231, "metadata": {}, "outputs": [ { @@ -3474,7 +3487,7 @@ " dtype='object')" ] }, - "execution_count": 128, + "execution_count": 231, "metadata": {}, "output_type": "execute_result" } @@ -3490,7 +3503,7 @@ }, { "cell_type": "code", - "execution_count": 129, + "execution_count": 232, "metadata": {}, "outputs": [ { @@ -3894,7 +3907,13 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:1623: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", - " y = column_or_1d(y, warn=True)\n", + " y = column_or_1d(y, warn=True)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (10) reached and the optimization hasn't converged yet.\n", " warnings.warn(\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", @@ -8890,7 +8909,7 @@ }, { "cell_type": "code", - "execution_count": 130, + "execution_count": 233, "metadata": {}, "outputs": [ { @@ -8984,7 +9003,7 @@ "max 36.786614 6.065197 6.065197" ] }, - "execution_count": 130, + "execution_count": 233, "metadata": {}, "output_type": "execute_result" } @@ -8996,6 +9015,159 @@ "loo_mlp_results.describe()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### MLP with all Variables" + ] + }, + { + "cell_type": "code", + "execution_count": 234, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Anzahl der Einwohner: innen',\n", + " 'Anzahl der Kinder und Jugendlichen unter 18 Jahren',\n", + " 'Anteil Kinder und Jugendlicher unter 18 Jahren an der Gesamt-bevölkerung',\n", + " 'Anzahl älterer Einwohner: innen über 64 Jahren',\n", + " 'Anteil älterer Einwohner: innen über 64 Jahren an der Gesamt-bevölkerung',\n", + " 'Anzahl der Haushalte',\n", + " 'Durch-schnittliche Anzahl der Personen je Haushalt', 'Fläche in km²',\n", + " 'Einwohner: innen je km²', 'Anzahl der Wohngebäude',\n", + " 'Anzahl der Wohnungen', 'Durch-schnittliche Wohnungs-größe in m²',\n", + " 'Durch-schnittliche Wohnfläche je Einwohner:in in m²',\n", + " 'Anzahl der Sozial-wohnungen',\n", + " 'Anteil der Sozial-wohnungen an allen Wohnungen',\n", + " 'Anzahl der Wohnungen in Ein- und Zweifamilien-häusern',\n", + " 'Anteil der Wohnungen in Ein- und Zweifamilien-häusern an allen Wohnungen',\n", + " 'Anzahl der Einpersonen-haushalte',\n", + " 'Anteil der Haushalte, in denen nur eine Person lebt, an allen Haushalten',\n", + " 'Gesamtbetrag der Einkünfte - [Steuerpflichtig]',\n", + " 'Gesamtbetrag der Einkünfte - [1000€]',\n", + " 'Festgesetzte Einkommenssteuer/ Jahreslohnsteuer - [1000€]',\n", + " 'Gesamtbetrag Einkünfte Mittelwert - [€]',\n", + " 'Gesamtbetrag Einkünfte Median - [€]', 'organic_restaurants_count',\n", + " 'vegan_restaurants_count', 'distance_rathaus'],\n", + " dtype='object')" + ] + }, + "execution_count": 234, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y2 = data2_df[[\"art_score\"]]\n", + "X2 = data2_df.drop(['art_score','market_count', 'farms_count','greencrocers_count', 'supermarkets_count', \n", + "'biosupermarkets_count','all_restaurants_count', ], axis=1)\n", + "X2= X2.dropna(axis='columns')\n", + "X2.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 235, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Data Evaluation:\n", + "R² Score: 0.15069178934954564\n", + "Mean Absolute Error (MAE): 0.7749609684397321\n", + "Mean Squared Error (MSE): 1.1735926460388846\n", + "Root Mean Squared Error (RMSE): 1.0833248109587839\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:1623: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", + " y = column_or_1d(y, warn=True)\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (10) reached and the optimization hasn't converged yet.\n", + " warnings.warn(\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n" + ] + } + ], + "source": [ + "## ------------------- Pipeline ------------------------ ##\n", + "\n", + "# train test split\n", + "X_train, X_test, y_train, y_test = train_test_split(X2, y2, test_size=0.2, random_state=42)\n", + "\n", + "# Pipeline - combine models \n", + "pipeline = Pipeline([('scaler', StandardScaler()), ('MLP', MLPRegressor(hidden_layer_sizes=(100,), activation='relu', solver='adam', max_iter=10, random_state=42))])\n", + "\n", + "pipeline.fit(X_train, y_train)\n", + "\n", + "y_pred_test = pipeline.predict(X_test)\n", + "\n", + "r2_score_test = pipeline.score(X_test, y_test)\n", + "mse_test = mean_squared_error(y_test, y_pred_test)\n", + "mae_test = mean_absolute_error(y_test, y_pred_test)\n", + "rmse_test = np.sqrt(mse_test)\n", + "\n", + "print(\"Test Data Evaluation:\")\n", + "print(\"R² Score:\", r2_score_test)\n", + "print(\"Mean Absolute Error (MAE):\", mae_test)\n", + "print(\"Mean Squared Error (MSE):\", mse_test)\n", + "print(\"Root Mean Squared Error (RMSE):\", rmse_test)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -9005,7 +9177,7 @@ }, { "cell_type": "code", - "execution_count": 131, + "execution_count": 236, "metadata": {}, "outputs": [ { @@ -9019,7 +9191,7 @@ " dtype='object')" ] }, - "execution_count": 131, + "execution_count": 236, "metadata": {}, "output_type": "execute_result" } @@ -9049,7 +9221,7 @@ }, { "cell_type": "code", - "execution_count": 132, + "execution_count": 237, "metadata": {}, "outputs": [ { @@ -9256,7 +9428,7 @@ }, { "cell_type": "code", - "execution_count": 133, + "execution_count": 238, "metadata": {}, "outputs": [ { @@ -9329,7 +9501,7 @@ }, { "cell_type": "code", - "execution_count": 134, + "execution_count": 239, "metadata": {}, "outputs": [ { @@ -9406,7 +9578,7 @@ }, { "cell_type": "code", - "execution_count": 135, + "execution_count": 240, "metadata": {}, "outputs": [ { @@ -9415,15 +9587,15 @@ "text": [ " precision recall f1-score support\n", "\n", - " 0 0.42 0.18 0.25 45\n", - " 1 0.45 0.23 0.30 40\n", - " 2 0.12 0.22 0.15 9\n", - " 3 0.00 0.00 0.00 4\n", + " 0 0.59 0.22 0.32 45\n", + " 1 0.35 0.23 0.27 40\n", + " 2 0.05 0.11 0.06 9\n", + " 3 0.11 0.50 0.18 4\n", " 4 0.00 0.00 0.00 1\n", "\n", - " accuracy 0.19 99\n", - " macro avg 0.20 0.12 0.14 99\n", - "weighted avg 0.38 0.19 0.25 99\n", + " accuracy 0.22 99\n", + " macro avg 0.22 0.21 0.17 99\n", + "weighted avg 0.42 0.22 0.27 99\n", "\n" ] }, @@ -9465,7 +9637,7 @@ }, { "cell_type": "code", - "execution_count": 136, + "execution_count": 241, "metadata": {}, "outputs": [ { @@ -9491,7 +9663,7 @@ }, { "cell_type": "code", - "execution_count": 137, + "execution_count": 242, "metadata": {}, "outputs": [ { @@ -9708,7 +9880,7 @@ }, { "cell_type": "code", - "execution_count": 138, + "execution_count": 243, "metadata": {}, "outputs": [ { @@ -9762,7 +9934,7 @@ }, { "cell_type": "code", - "execution_count": 139, + "execution_count": 244, "metadata": {}, "outputs": [ { @@ -9776,7 +9948,7 @@ " dtype='object')" ] }, - "execution_count": 139, + "execution_count": 244, "metadata": {}, "output_type": "execute_result" } @@ -9808,7 +9980,7 @@ }, { "cell_type": "code", - "execution_count": 140, + "execution_count": 245, "metadata": {}, "outputs": [ { @@ -9816,7 +9988,7 @@ "output_type": "stream", "text": [ "Training Set Accuracy: 1.0\n", - "Test Set Accuracy: 0.45\n", + "Test Set Accuracy: 0.5\n", "\n", "Training Set Classification Report:\n", " precision recall f1-score support\n", @@ -9835,15 +10007,14 @@ "Test Set Classification Report:\n", " precision recall f1-score support\n", "\n", - " 0 0.70 0.78 0.74 9\n", - " 1 0.33 0.29 0.31 7\n", + " 0 0.78 0.78 0.78 9\n", + " 1 0.38 0.43 0.40 7\n", " 2 0.00 0.00 0.00 4\n", - " 3 0.00 0.00 0.00 0\n", " 4 0.00 0.00 0.00 0\n", "\n", - " accuracy 0.45 20\n", - " macro avg 0.21 0.21 0.21 20\n", - "weighted avg 0.43 0.45 0.44 20\n", + " accuracy 0.50 20\n", + " macro avg 0.29 0.30 0.29 20\n", + "weighted avg 0.48 0.50 0.49 20\n", "\n", "\n", "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('DTC', DecisionTreeClassifier())], 'verbose': False, 'scaler': StandardScaler(), 'DTC': DecisionTreeClassifier(), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'DTC__ccp_alpha': 0.0, 'DTC__class_weight': None, 'DTC__criterion': 'gini', 'DTC__max_depth': None, 'DTC__max_features': None, 'DTC__max_leaf_nodes': None, 'DTC__min_impurity_decrease': 0.0, 'DTC__min_samples_leaf': 1, 'DTC__min_samples_split': 2, 'DTC__min_weight_fraction_leaf': 0.0, 'DTC__random_state': None, 'DTC__splitter': 'best'}\n" @@ -10013,7 +10184,7 @@ }, { "cell_type": "code", - "execution_count": 141, + "execution_count": 246, "metadata": {}, "outputs": [ { @@ -10027,7 +10198,7 @@ " dtype='object')" ] }, - "execution_count": 141, + "execution_count": 246, "metadata": {}, "output_type": "execute_result" } @@ -10059,9 +10230,41 @@ }, { "cell_type": "code", - "execution_count": 142, + "execution_count": 247, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training Set Accuracy: 1.0\n", + "Test Set Accuracy: 0.85\n", + "\n", + "Training Set Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 1.00 1.00 36\n", + " 1 1.00 1.00 1.00 43\n", + "\n", + " accuracy 1.00 79\n", + " macro avg 1.00 1.00 1.00 79\n", + "weighted avg 1.00 1.00 1.00 79\n", + "\n", + "\n", + "Test Set Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " 0 0.88 0.78 0.82 9\n", + " 1 0.83 0.91 0.87 11\n", + "\n", + " accuracy 0.85 20\n", + " macro avg 0.85 0.84 0.85 20\n", + "weighted avg 0.85 0.85 0.85 20\n", + "\n", + "\n", + "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('DTC', DecisionTreeClassifier())], 'verbose': False, 'scaler': StandardScaler(), 'DTC': DecisionTreeClassifier(), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'DTC__ccp_alpha': 0.0, 'DTC__class_weight': None, 'DTC__criterion': 'gini', 'DTC__max_depth': None, 'DTC__max_features': None, 'DTC__max_leaf_nodes': None, 'DTC__min_impurity_decrease': 0.0, 'DTC__min_samples_leaf': 1, 'DTC__min_samples_split': 2, 'DTC__min_weight_fraction_leaf': 0.0, 'DTC__random_state': None, 'DTC__splitter': 'best'}\n" + ] + }, { "name": "stderr", "output_type": "stream", @@ -10181,38 +10384,6 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n" ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Training Set Accuracy: 1.0\n", - "Test Set Accuracy: 0.85\n", - "\n", - "Training Set Classification Report:\n", - " precision recall f1-score support\n", - "\n", - " 0 1.00 1.00 1.00 36\n", - " 1 1.00 1.00 1.00 43\n", - "\n", - " accuracy 1.00 79\n", - " macro avg 1.00 1.00 1.00 79\n", - "weighted avg 1.00 1.00 1.00 79\n", - "\n", - "\n", - "Test Set Classification Report:\n", - " precision recall f1-score support\n", - "\n", - " 0 0.88 0.78 0.82 9\n", - " 1 0.83 0.91 0.87 11\n", - "\n", - " accuracy 0.85 20\n", - " macro avg 0.85 0.84 0.85 20\n", - "weighted avg 0.85 0.85 0.85 20\n", - "\n", - "\n", - "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('DTC', DecisionTreeClassifier())], 'verbose': False, 'scaler': StandardScaler(), 'DTC': DecisionTreeClassifier(), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'DTC__ccp_alpha': 0.0, 'DTC__class_weight': None, 'DTC__criterion': 'gini', 'DTC__max_depth': None, 'DTC__max_features': None, 'DTC__max_leaf_nodes': None, 'DTC__min_impurity_decrease': 0.0, 'DTC__min_samples_leaf': 1, 'DTC__min_samples_split': 2, 'DTC__min_weight_fraction_leaf': 0.0, 'DTC__random_state': None, 'DTC__splitter': 'best'}\n" - ] } ], "source": [ @@ -10259,7 +10430,7 @@ }, { "cell_type": "code", - "execution_count": 143, + "execution_count": 248, "metadata": {}, "outputs": [ { @@ -10273,7 +10444,7 @@ " dtype='object')" ] }, - "execution_count": 143, + "execution_count": 248, "metadata": {}, "output_type": "execute_result" } @@ -10286,16 +10457,33 @@ " 'Agglo_5', 'Agglo_2', 'market_yes_no'], axis=1)\n", "\n", "\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", "X.columns" ] }, { "cell_type": "code", - "execution_count": 144, + "execution_count": 249, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -10336,18 +10524,6 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype):\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype):\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.\n", " warnings.warn(\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", @@ -10479,7 +10655,7 @@ }, { "cell_type": "code", - "execution_count": 145, + "execution_count": 250, "metadata": {}, "outputs": [], "source": [ @@ -10495,9 +10671,27 @@ }, { "cell_type": "code", - "execution_count": 146, + "execution_count": 251, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -10534,18 +10728,6 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype):\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype):\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.\n", " warnings.warn(\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", @@ -10671,7 +10853,7 @@ }, { "cell_type": "code", - "execution_count": 147, + "execution_count": 252, "metadata": {}, "outputs": [ { @@ -19051,6 +19233,235 @@ "print(f\"Average Recall: {average_recall}\")\n", "print(f\"Average F1 Score: {average_f1_score}\")\n" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### MLP classifier all variables" + ] + }, + { + "cell_type": "code", + "execution_count": 253, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Anzahl der Einwohner: innen',\n", + " 'Anzahl der Kinder und Jugendlichen unter 18 Jahren',\n", + " 'Anzahl älterer Einwohner: innen über 64 Jahren',\n", + " 'Anzahl der Haushalte', 'Fläche in km²', 'Einwohner: innen je km²',\n", + " 'Anzahl der Wohngebäude', 'Anzahl der Wohnungen',\n", + " 'Durch-schnittliche Wohnungs-größe in m²',\n", + " 'Anzahl der Sozial-wohnungen',\n", + " 'Anteil der Sozial-wohnungen an allen Wohnungen',\n", + " 'Anzahl der Wohnungen in Ein- und Zweifamilien-häusern',\n", + " 'Anteil der Wohnungen in Ein- und Zweifamilien-häusern an allen Wohnungen',\n", + " 'Anzahl der Einpersonen-haushalte',\n", + " 'Anteil der Haushalte, in denen nur eine Person lebt, an allen Haushalten',\n", + " 'Gesamtbetrag der Einkünfte - [Steuerpflichtig]',\n", + " 'Gesamtbetrag der Einkünfte - [1000€]',\n", + " 'Festgesetzte Einkommenssteuer/ Jahreslohnsteuer - [1000€]',\n", + " 'Gesamtbetrag Einkünfte Mittelwert - [€]',\n", + " 'Gesamtbetrag Einkünfte Median - [€]', 'greencrocers_count',\n", + " 'supermarkets_count', 'biosupermarkets_count', 'all_restaurants_count',\n", + " 'organic_restaurants_count', 'vegan_restaurants_count',\n", + " 'distance_rathaus'],\n", + " dtype='object')" + ] + }, + "execution_count": 253, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y3 = data_df[[\"market_yes_no\"]]\n", + "\n", + "X3 = data2_df.drop(['Anteil Kinder und Jugendlicher unter 18 Jahren an der Gesamt-bevölkerung',\n", + " 'Anteil älterer Einwohner: innen über 64 Jahren an der Gesamt-bevölkerung','market_count', 'Durch-schnittliche Wohnfläche je Einwohner:in in m²',\n", + " 'Durch-schnittliche Anzahl der Personen je Haushalt', 'art_score', 'farms_count'], axis=1)\n", + "X3 = X3.dropna(axis=1)\n", + "X3.columns\n" + ] + }, + { + "cell_type": "code", + "execution_count": 254, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training Set Accuracy: 1.0\n", + "Test Set Accuracy: 0.7\n", + "\n", + "Training Set Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 1.00 1.00 36\n", + " 1 1.00 1.00 1.00 43\n", + "\n", + " accuracy 1.00 79\n", + " macro avg 1.00 1.00 1.00 79\n", + "weighted avg 1.00 1.00 1.00 79\n", + "\n", + "\n", + "Test Set Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " 0 0.71 0.56 0.63 9\n", + " 1 0.69 0.82 0.75 11\n", + "\n", + " accuracy 0.70 20\n", + " macro avg 0.70 0.69 0.69 20\n", + "weighted avg 0.70 0.70 0.69 20\n", + "\n", + "\n", + "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('MLP', MLPClassifier(max_iter=300, random_state=42))], 'verbose': False, 'scaler': StandardScaler(), 'MLP': MLPClassifier(max_iter=300, random_state=42), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'MLP__activation': 'relu', 'MLP__alpha': 0.0001, 'MLP__batch_size': 'auto', 'MLP__beta_1': 0.9, 'MLP__beta_2': 0.999, 'MLP__early_stopping': False, 'MLP__epsilon': 1e-08, 'MLP__hidden_layer_sizes': (100,), 'MLP__learning_rate': 'constant', 'MLP__learning_rate_init': 0.001, 'MLP__max_fun': 15000, 'MLP__max_iter': 300, 'MLP__momentum': 0.9, 'MLP__n_iter_no_change': 10, 'MLP__nesterovs_momentum': True, 'MLP__power_t': 0.5, 'MLP__random_state': 42, 'MLP__shuffle': True, 'MLP__solver': 'adam', 'MLP__tol': 0.0001, 'MLP__validation_fraction': 0.1, 'MLP__verbose': False, 'MLP__warm_start': False}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.\n", + " warnings.warn(\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n" + ] + } + ], + "source": [ + "\n", + "# Splitting data into training and test sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X3, y3, test_size=0.2, random_state=42)\n", + "\n", + "# Setting up the pipeline with MLPClassifier\n", + "pipeline = Pipeline([\n", + " ('scaler', StandardScaler()), # Feature scaling is especially important for neural networks\n", + " ('MLP', MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', max_iter=300, random_state=42))\n", + "])\n", + "\n", + "# Fitting the pipeline to the training data\n", + "pipeline.fit(X_train, y_train.values.ravel())\n", + "\n", + "# Making predictions on both the training and test sets\n", + "y_pred_train = pipeline.predict(X_train)\n", + "y_pred_test = pipeline.predict(X_test)\n", + "\n", + "# Calculating and printing accuracy for both sets\n", + "accuracy_train = accuracy_score(y_train, y_pred_train)\n", + "print(\"Training Set Accuracy:\", accuracy_train)\n", + "\n", + "accuracy_test = accuracy_score(y_test, y_pred_test)\n", + "print(\"Test Set Accuracy:\", accuracy_test)\n", + "\n", + "# Generating and printing classification reports for both sets\n", + "print(\"\\nTraining Set Classification Report:\\n\", classification_report(y_train, y_pred_train))\n", + "print(\"\\nTest Set Classification Report:\\n\", classification_report(y_test, y_pred_test))\n", + "\n", + "# Getting and printing the pipeline's parameters\n", + "params = pipeline.get_params()\n", + "print(\"\\nModel Parameters:\", params)" + ] } ], "metadata": {