diff --git a/analysis.ipynb b/analysis.ipynb index c6c1a1c81d2d069964e6baf5a9b269e3358bbd90..827d82609222c898bb5fcd231d991be4695f1c08 100644 --- a/analysis.ipynb +++ b/analysis.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 208, + "execution_count": 255, "metadata": {}, "outputs": [], "source": [ @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 209, + "execution_count": 256, "metadata": {}, "outputs": [ { @@ -1054,7 +1054,7 @@ "[99 rows x 39 columns]" ] }, - "execution_count": 209, + "execution_count": 256, "metadata": {}, "output_type": "execute_result" } @@ -1082,7 +1082,7 @@ }, { "cell_type": "code", - "execution_count": 210, + "execution_count": 257, "metadata": {}, "outputs": [], "source": [ @@ -1116,7 +1116,7 @@ }, { "cell_type": "code", - "execution_count": 211, + "execution_count": 258, "metadata": {}, "outputs": [ { @@ -1416,7 +1416,7 @@ "max 4.000000 7.208333 0.277313 " ] }, - "execution_count": 211, + "execution_count": 258, "metadata": {}, "output_type": "execute_result" } @@ -1427,7 +1427,7 @@ }, { "cell_type": "code", - "execution_count": 212, + "execution_count": 259, "metadata": {}, "outputs": [ { @@ -1499,7 +1499,7 @@ }, { "cell_type": "code", - "execution_count": 213, + "execution_count": 260, "metadata": {}, "outputs": [ { @@ -1580,7 +1580,7 @@ }, { "cell_type": "code", - "execution_count": 214, + "execution_count": 261, "metadata": {}, "outputs": [ { @@ -1655,7 +1655,7 @@ }, { "cell_type": "code", - "execution_count": 215, + "execution_count": 262, "metadata": {}, "outputs": [ { @@ -1993,7 +1993,7 @@ }, { "cell_type": "code", - "execution_count": 216, + "execution_count": 263, "metadata": {}, "outputs": [ { @@ -2012,7 +2012,7 @@ "Text(0.5, 1.0, 'Density Plot of Art_Score ')" ] }, - "execution_count": 216, + "execution_count": 263, "metadata": {}, "output_type": "execute_result" }, @@ -2037,7 +2037,7 @@ }, { "cell_type": "code", - "execution_count": 217, + "execution_count": 264, "metadata": {}, "outputs": [ { @@ -2071,7 +2071,7 @@ }, { "cell_type": "code", - "execution_count": 218, + "execution_count": 265, "metadata": {}, "outputs": [ { @@ -2105,7 +2105,7 @@ }, { "cell_type": "code", - "execution_count": 219, + "execution_count": 266, "metadata": {}, "outputs": [ { @@ -2153,7 +2153,7 @@ }, { "cell_type": "code", - "execution_count": 220, + "execution_count": 267, "metadata": {}, "outputs": [ { @@ -2227,7 +2227,7 @@ }, { "cell_type": "code", - "execution_count": 221, + "execution_count": 268, "metadata": {}, "outputs": [ { @@ -2245,13 +2245,7 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", "/var/folders/lj/q1ndb2493275c2y0vzyplnk00000gn/T/ipykernel_25655/2287162676.py:19: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", @@ -2299,7 +2293,7 @@ }, { "cell_type": "code", - "execution_count": 222, + "execution_count": 269, "metadata": {}, "outputs": [ { @@ -2340,7 +2334,7 @@ }, { "cell_type": "code", - "execution_count": 223, + "execution_count": 270, "metadata": {}, "outputs": [ { @@ -2435,12 +2429,12 @@ " <td>4</td>\n", " <td>2.565476</td>\n", " <td>0.000000</td>\n", + " <td>3</td>\n", " <td>1</td>\n", - " <td>0</td>\n", " <td>2</td>\n", " <td>0</td>\n", - " <td>4</td>\n", - " <td>0</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>HafenCity</th>\n", @@ -2461,12 +2455,12 @@ " <td>1</td>\n", " <td>1.952381</td>\n", " <td>0.005758</td>\n", + " <td>3</td>\n", " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", " <td>4</td>\n", " <td>0</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>Neustadt</th>\n", @@ -2487,12 +2481,12 @@ " <td>4</td>\n", " <td>2.702381</td>\n", " <td>0.001174</td>\n", + " <td>3</td>\n", " <td>1</td>\n", - " <td>0</td>\n", " <td>2</td>\n", " <td>0</td>\n", - " <td>4</td>\n", - " <td>0</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>St. Pauli</th>\n", @@ -2513,12 +2507,12 @@ " <td>2</td>\n", " <td>1.851190</td>\n", " <td>0.009631</td>\n", + " <td>3</td>\n", " <td>1</td>\n", - " <td>0</td>\n", " <td>2</td>\n", " <td>0</td>\n", - " <td>4</td>\n", - " <td>0</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>St. Georg</th>\n", @@ -2539,12 +2533,12 @@ " <td>4</td>\n", " <td>2.523810</td>\n", " <td>0.008493</td>\n", + " <td>3</td>\n", " <td>1</td>\n", - " <td>0</td>\n", " <td>2</td>\n", " <td>0</td>\n", - " <td>4</td>\n", - " <td>0</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", @@ -2591,12 +2585,12 @@ " <td>0</td>\n", " <td>0.422619</td>\n", " <td>0.104116</td>\n", + " <td>2</td>\n", " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", " <td>0</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>Neugraben-Fischbek</th>\n", @@ -2617,12 +2611,12 @@ " <td>0</td>\n", " <td>1.857143</td>\n", " <td>0.130211</td>\n", + " <td>2</td>\n", " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", " <td>0</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>Francop</th>\n", @@ -2643,12 +2637,12 @@ " <td>0</td>\n", " <td>0.000000</td>\n", " <td>0.114626</td>\n", - " <td>2</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>Neuenfelde</th>\n", @@ -2669,12 +2663,12 @@ " <td>0</td>\n", " <td>0.375000</td>\n", " <td>0.162791</td>\n", + " <td>2</td>\n", " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", " <td>0</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>Cranz</th>\n", @@ -2695,12 +2689,12 @@ " <td>0</td>\n", " <td>0.000000</td>\n", " <td>0.200686</td>\n", - " <td>2</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -2864,36 +2858,36 @@ "\n", " KMeans_5 KMeans_2 Spectral_5 Spectral_2 Agglo_5 \\\n", "Stadtteil \n", - "Hamburg-Altstadt 1 0 2 0 4 \n", - "HafenCity 1 0 1 0 4 \n", - "Neustadt 1 0 2 0 4 \n", - "St. Pauli 1 0 2 0 4 \n", - "St. Georg 1 0 2 0 4 \n", + "Hamburg-Altstadt 3 1 2 0 3 \n", + "HafenCity 3 1 4 0 3 \n", + "Neustadt 3 1 2 0 3 \n", + "St. Pauli 3 1 2 0 3 \n", + "St. Georg 3 1 2 0 3 \n", "... ... ... ... ... ... \n", - "Hausbruch 0 1 1 0 3 \n", - "Neugraben-Fischbek 0 1 1 0 3 \n", - "Francop 2 1 1 1 0 \n", - "Neuenfelde 0 1 1 0 3 \n", - "Cranz 2 1 1 1 0 \n", + "Hausbruch 2 0 0 0 2 \n", + "Neugraben-Fischbek 2 0 0 0 2 \n", + "Francop 0 0 0 1 0 \n", + "Neuenfelde 2 0 0 0 2 \n", + "Cranz 0 0 0 1 0 \n", "\n", " Agglo_2 \n", "Stadtteil \n", - "Hamburg-Altstadt 0 \n", - "HafenCity 0 \n", - "Neustadt 0 \n", - "St. Pauli 0 \n", - "St. Georg 0 \n", + "Hamburg-Altstadt 1 \n", + "HafenCity 1 \n", + "Neustadt 1 \n", + "St. Pauli 1 \n", + "St. Georg 1 \n", "... ... \n", - "Hausbruch 1 \n", - "Neugraben-Fischbek 1 \n", - "Francop 1 \n", - "Neuenfelde 1 \n", - "Cranz 1 \n", + "Hausbruch 0 \n", + "Neugraben-Fischbek 0 \n", + "Francop 0 \n", + "Neuenfelde 0 \n", + "Cranz 0 \n", "\n", "[99 rows x 23 columns]" ] }, - "execution_count": 223, + "execution_count": 270, "metadata": {}, "output_type": "execute_result" } @@ -2904,7 +2898,7 @@ }, { "cell_type": "code", - "execution_count": 224, + "execution_count": 271, "metadata": {}, "outputs": [], "source": [ @@ -2936,7 +2930,7 @@ }, { "cell_type": "code", - "execution_count": 225, + "execution_count": 272, "metadata": {}, "outputs": [ { @@ -2953,7 +2947,7 @@ " dtype='object')" ] }, - "execution_count": 225, + "execution_count": 272, "metadata": {}, "output_type": "execute_result" } @@ -2979,7 +2973,7 @@ }, { "cell_type": "code", - "execution_count": 226, + "execution_count": 273, "metadata": {}, "outputs": [ { @@ -3076,7 +3070,7 @@ }, { "cell_type": "code", - "execution_count": 227, + "execution_count": 274, "metadata": {}, "outputs": [ { @@ -3125,7 +3119,7 @@ }, { "cell_type": "code", - "execution_count": 228, + "execution_count": 275, "metadata": {}, "outputs": [ { @@ -3149,18 +3143,6 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype):\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype):\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n" ] }, @@ -3169,21 +3151,21 @@ "output_type": "stream", "text": [ "Test Data Evaluation:\n", - "R² Score: -0.4464530491116274\n", - "Mean Absolute Error (MAE): 1.0651785714285713\n", - "Mean Squared Error (MSE): 1.9987404336734695\n", - "Root Mean Squared Error (RMSE): 1.413768168291205\n", + "R² Score: -0.5726397634494289\n", + "Mean Absolute Error (MAE): 1.1434523809523809\n", + "Mean Squared Error (MSE): 2.173107993197279\n", + "Root Mean Squared Error (RMSE): 1.4741465304362653\n", " Coefficient\n", - "Anteil Kinder und Jugendlicher unter 18 Jahren ... 0.091625\n", - "Anteil älterer Einwohner: innen über 64 Jahren ... 0.066265\n", - "Einwohner: innen je km² 0.310324\n", - "Gesamtbetrag Einkünfte Median - [€] 0.023681\n", - "Anteil der Sozial-wohnungen an allen Wohnungen 0.030918\n", - "Durch-schnittliche Wohnfläche je Einwohner:in i... 0.126750\n", - "Durch-schnittliche Anzahl der Personen je Haushalt 0.041658\n", - "organic_restaurants_count 0.000007\n", + "Anteil Kinder und Jugendlicher unter 18 Jahren ... 0.131468\n", + "Anteil älterer Einwohner: innen über 64 Jahren ... 0.001418\n", + "Einwohner: innen je km² 0.308173\n", + "Gesamtbetrag Einkünfte Median - [€] 0.127030\n", + "Anteil der Sozial-wohnungen an allen Wohnungen 0.029023\n", + "Durch-schnittliche Wohnfläche je Einwohner:in i... 0.025656\n", + "Durch-schnittliche Anzahl der Personen je Haushalt 0.004799\n", + "organic_restaurants_count 0.001845\n", "vegan_restaurants_count 0.243526\n", - "distance_rathaus 0.065245\n" + "distance_rathaus 0.127063\n" ] }, { @@ -3207,6 +3189,18 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if is_sparse(pd_dtype):\n", + "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n" ] } @@ -3260,7 +3254,7 @@ }, { "cell_type": "code", - "execution_count": 229, + "execution_count": 276, "metadata": {}, "outputs": [ { @@ -3363,7 +3357,7 @@ }, { "cell_type": "code", - "execution_count": 230, + "execution_count": 277, "metadata": {}, "outputs": [ { @@ -3470,7 +3464,7 @@ }, { "cell_type": "code", - "execution_count": 231, + "execution_count": 278, "metadata": {}, "outputs": [ { @@ -3487,7 +3481,7 @@ " dtype='object')" ] }, - "execution_count": 231, + "execution_count": 278, "metadata": {}, "output_type": "execute_result" } @@ -3503,7 +3497,7 @@ }, { "cell_type": "code", - "execution_count": 232, + "execution_count": 279, "metadata": {}, "outputs": [ { @@ -3907,13 +3901,7 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:1623: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", - " y = column_or_1d(y, warn=True)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + " y = column_or_1d(y, warn=True)\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (10) reached and the optimization hasn't converged yet.\n", " warnings.warn(\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", @@ -4237,7 +4225,13 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:1623: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", - " y = column_or_1d(y, warn=True)\n", + " y = column_or_1d(y, warn=True)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (10) reached and the optimization hasn't converged yet.\n", " warnings.warn(\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", @@ -8909,7 +8903,7 @@ }, { "cell_type": "code", - "execution_count": 233, + "execution_count": 280, "metadata": {}, "outputs": [ { @@ -9003,7 +8997,7 @@ "max 36.786614 6.065197 6.065197" ] }, - "execution_count": 233, + "execution_count": 280, "metadata": {}, "output_type": "execute_result" } @@ -9024,7 +9018,7 @@ }, { "cell_type": "code", - "execution_count": 234, + "execution_count": 281, "metadata": {}, "outputs": [ { @@ -9055,7 +9049,7 @@ " dtype='object')" ] }, - "execution_count": 234, + "execution_count": 281, "metadata": {}, "output_type": "execute_result" } @@ -9070,7 +9064,7 @@ }, { "cell_type": "code", - "execution_count": 235, + "execution_count": 282, "metadata": {}, "outputs": [ { @@ -9177,7 +9171,7 @@ }, { "cell_type": "code", - "execution_count": 236, + "execution_count": 283, "metadata": {}, "outputs": [ { @@ -9191,7 +9185,7 @@ " dtype='object')" ] }, - "execution_count": 236, + "execution_count": 283, "metadata": {}, "output_type": "execute_result" } @@ -9221,40 +9215,36 @@ }, { "cell_type": "code", - "execution_count": 237, + "execution_count": 304, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Training Set Accuracy: 0.8227848101265823\n", - "Test Set Accuracy: 0.55\n", + "Training Set Accuracy: 0.8734177215189873\n", + "Test Set Accuracy: 0.85\n", "\n", "Training Set Classification Report:\n", " precision recall f1-score support\n", "\n", - " 0 0.84 0.89 0.86 36\n", - " 1 0.77 0.82 0.79 33\n", - " 2 1.00 0.80 0.89 5\n", - " 3 1.00 0.25 0.40 4\n", - " 4 1.00 1.00 1.00 1\n", + " 0 0.86 0.86 0.86 36\n", + " 1 0.88 0.88 0.88 43\n", "\n", - " accuracy 0.82 79\n", - " macro avg 0.92 0.75 0.79 79\n", - "weighted avg 0.83 0.82 0.82 79\n", + " accuracy 0.87 79\n", + " macro avg 0.87 0.87 0.87 79\n", + "weighted avg 0.87 0.87 0.87 79\n", "\n", "\n", "Test Set Classification Report:\n", " precision recall f1-score support\n", "\n", - " 0 0.70 0.78 0.74 9\n", - " 1 0.40 0.57 0.47 7\n", - " 2 0.00 0.00 0.00 4\n", + " 0 0.88 0.78 0.82 9\n", + " 1 0.83 0.91 0.87 11\n", "\n", - " accuracy 0.55 20\n", - " macro avg 0.37 0.45 0.40 20\n", - "weighted avg 0.45 0.55 0.50 20\n", + " accuracy 0.85 20\n", + " macro avg 0.85 0.84 0.85 20\n", + "weighted avg 0.85 0.85 0.85 20\n", "\n", "\n", "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('LogM', LogisticRegression())], 'verbose': False, 'scaler': StandardScaler(), 'LogM': LogisticRegression(), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'LogM__C': 1.0, 'LogM__class_weight': None, 'LogM__dual': False, 'LogM__fit_intercept': True, 'LogM__intercept_scaling': 1, 'LogM__l1_ratio': None, 'LogM__max_iter': 100, 'LogM__multi_class': 'auto', 'LogM__n_jobs': None, 'LogM__penalty': 'l2', 'LogM__random_state': None, 'LogM__solver': 'lbfgs', 'LogM__tol': 0.0001, 'LogM__verbose': 0, 'LogM__warm_start': False}\n" @@ -9379,14 +9369,21 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n", - "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n" + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n" ] + }, + { + "data": { + "text/plain": [ + "array([0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1,\n", + " 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1,\n", + " 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1,\n", + " 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1])" + ] + }, + "execution_count": 304, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -9428,7 +9425,53 @@ }, { "cell_type": "code", - "execution_count": 238, + "execution_count": 314, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/lj/q1ndb2493275c2y0vzyplnk00000gn/T/ipykernel_25655/4287676306.py:4: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " data_df['y_pred'] = pd.concat([y_pred_train_series, y_pred_test_series])\n", + "/var/folders/lj/q1ndb2493275c2y0vzyplnk00000gn/T/ipykernel_25655/4287676306.py:5: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " data_df['residuals'] = data_df['market_count'] - data_df['y_pred']\n" + ] + }, + { + "data": { + "text/plain": [ + "Index(['Altona-Nord', 'Bahrenfeld', 'Othmarschen', 'Marienthal',\n", + " 'Hummelsbüttel', 'Eißendorf', 'Heimfeld'],\n", + " dtype='object', name='Stadtteil')" + ] + }, + "execution_count": 314, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred_train_series = pd.Series(y_pred_train, index=y_train.index)\n", + "y_pred_test_series = pd.Series(y_pred_test, index=y_test.index)\n", + "\n", + "data_df['y_pred'] = pd.concat([y_pred_train_series, y_pred_test_series])\n", + "data_df['residuals'] = data_df['market_count'] - data_df['y_pred']\n", + "\n", + "data_df[data_df['residuals'] < 0].index" + ] + }, + { + "cell_type": "code", + "execution_count": 285, "metadata": {}, "outputs": [ { @@ -9501,7 +9544,7 @@ }, { "cell_type": "code", - "execution_count": 239, + "execution_count": 286, "metadata": {}, "outputs": [ { @@ -9578,7 +9621,7 @@ }, { "cell_type": "code", - "execution_count": 240, + "execution_count": 287, "metadata": {}, "outputs": [ { @@ -9587,15 +9630,15 @@ "text": [ " precision recall f1-score support\n", "\n", - " 0 0.59 0.22 0.32 45\n", - " 1 0.35 0.23 0.27 40\n", - " 2 0.05 0.11 0.06 9\n", - " 3 0.11 0.50 0.18 4\n", + " 0 0.40 0.18 0.25 45\n", + " 1 0.36 0.20 0.26 40\n", + " 2 0.00 0.00 0.00 9\n", + " 3 0.05 0.25 0.08 4\n", " 4 0.00 0.00 0.00 1\n", "\n", - " accuracy 0.22 99\n", - " macro avg 0.22 0.21 0.17 99\n", - "weighted avg 0.42 0.22 0.27 99\n", + " accuracy 0.17 99\n", + " macro avg 0.16 0.13 0.12 99\n", + "weighted avg 0.33 0.17 0.22 99\n", "\n" ] }, @@ -9637,7 +9680,7 @@ }, { "cell_type": "code", - "execution_count": 241, + "execution_count": 288, "metadata": {}, "outputs": [ { @@ -9663,7 +9706,7 @@ }, { "cell_type": "code", - "execution_count": 242, + "execution_count": 289, "metadata": {}, "outputs": [ { @@ -9880,7 +9923,7 @@ }, { "cell_type": "code", - "execution_count": 243, + "execution_count": 290, "metadata": {}, "outputs": [ { @@ -9934,7 +9977,7 @@ }, { "cell_type": "code", - "execution_count": 244, + "execution_count": 291, "metadata": {}, "outputs": [ { @@ -9948,7 +9991,7 @@ " dtype='object')" ] }, - "execution_count": 244, + "execution_count": 291, "metadata": {}, "output_type": "execute_result" } @@ -9980,7 +10023,7 @@ }, { "cell_type": "code", - "execution_count": 245, + "execution_count": 292, "metadata": {}, "outputs": [ { @@ -9988,7 +10031,7 @@ "output_type": "stream", "text": [ "Training Set Accuracy: 1.0\n", - "Test Set Accuracy: 0.5\n", + "Test Set Accuracy: 0.55\n", "\n", "Training Set Classification Report:\n", " precision recall f1-score support\n", @@ -10007,14 +10050,15 @@ "Test Set Classification Report:\n", " precision recall f1-score support\n", "\n", - " 0 0.78 0.78 0.78 9\n", - " 1 0.38 0.43 0.40 7\n", + " 0 0.88 0.78 0.82 9\n", + " 1 0.50 0.57 0.53 7\n", " 2 0.00 0.00 0.00 4\n", + " 3 0.00 0.00 0.00 0\n", " 4 0.00 0.00 0.00 0\n", "\n", - " accuracy 0.50 20\n", - " macro avg 0.29 0.30 0.29 20\n", - "weighted avg 0.48 0.50 0.49 20\n", + " accuracy 0.55 20\n", + " macro avg 0.28 0.27 0.27 20\n", + "weighted avg 0.57 0.55 0.56 20\n", "\n", "\n", "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('DTC', DecisionTreeClassifier())], 'verbose': False, 'scaler': StandardScaler(), 'DTC': DecisionTreeClassifier(), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'DTC__ccp_alpha': 0.0, 'DTC__class_weight': None, 'DTC__criterion': 'gini', 'DTC__max_depth': None, 'DTC__max_features': None, 'DTC__max_leaf_nodes': None, 'DTC__min_impurity_decrease': 0.0, 'DTC__min_samples_leaf': 1, 'DTC__min_samples_split': 2, 'DTC__min_weight_fraction_leaf': 0.0, 'DTC__random_state': None, 'DTC__splitter': 'best'}\n" @@ -10184,7 +10228,7 @@ }, { "cell_type": "code", - "execution_count": 246, + "execution_count": 293, "metadata": {}, "outputs": [ { @@ -10198,7 +10242,7 @@ " dtype='object')" ] }, - "execution_count": 246, + "execution_count": 293, "metadata": {}, "output_type": "execute_result" } @@ -10230,41 +10274,9 @@ }, { "cell_type": "code", - "execution_count": 247, + "execution_count": 294, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Training Set Accuracy: 1.0\n", - "Test Set Accuracy: 0.85\n", - "\n", - "Training Set Classification Report:\n", - " precision recall f1-score support\n", - "\n", - " 0 1.00 1.00 1.00 36\n", - " 1 1.00 1.00 1.00 43\n", - "\n", - " accuracy 1.00 79\n", - " macro avg 1.00 1.00 1.00 79\n", - "weighted avg 1.00 1.00 1.00 79\n", - "\n", - "\n", - "Test Set Classification Report:\n", - " precision recall f1-score support\n", - "\n", - " 0 0.88 0.78 0.82 9\n", - " 1 0.83 0.91 0.87 11\n", - "\n", - " accuracy 0.85 20\n", - " macro avg 0.85 0.84 0.85 20\n", - "weighted avg 0.85 0.85 0.85 20\n", - "\n", - "\n", - "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('DTC', DecisionTreeClassifier())], 'verbose': False, 'scaler': StandardScaler(), 'DTC': DecisionTreeClassifier(), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'DTC__ccp_alpha': 0.0, 'DTC__class_weight': None, 'DTC__criterion': 'gini', 'DTC__max_depth': None, 'DTC__max_features': None, 'DTC__max_leaf_nodes': None, 'DTC__min_impurity_decrease': 0.0, 'DTC__min_samples_leaf': 1, 'DTC__min_samples_split': 2, 'DTC__min_weight_fraction_leaf': 0.0, 'DTC__random_state': None, 'DTC__splitter': 'best'}\n" - ] - }, { "name": "stderr", "output_type": "stream", @@ -10384,6 +10396,38 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n" ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training Set Accuracy: 1.0\n", + "Test Set Accuracy: 0.8\n", + "\n", + "Training Set Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 1.00 1.00 36\n", + " 1 1.00 1.00 1.00 43\n", + "\n", + " accuracy 1.00 79\n", + " macro avg 1.00 1.00 1.00 79\n", + "weighted avg 1.00 1.00 1.00 79\n", + "\n", + "\n", + "Test Set Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " 0 0.78 0.78 0.78 9\n", + " 1 0.82 0.82 0.82 11\n", + "\n", + " accuracy 0.80 20\n", + " macro avg 0.80 0.80 0.80 20\n", + "weighted avg 0.80 0.80 0.80 20\n", + "\n", + "\n", + "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('DTC', DecisionTreeClassifier())], 'verbose': False, 'scaler': StandardScaler(), 'DTC': DecisionTreeClassifier(), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'DTC__ccp_alpha': 0.0, 'DTC__class_weight': None, 'DTC__criterion': 'gini', 'DTC__max_depth': None, 'DTC__max_features': None, 'DTC__max_leaf_nodes': None, 'DTC__min_impurity_decrease': 0.0, 'DTC__min_samples_leaf': 1, 'DTC__min_samples_split': 2, 'DTC__min_weight_fraction_leaf': 0.0, 'DTC__random_state': None, 'DTC__splitter': 'best'}\n" + ] } ], "source": [ @@ -10430,7 +10474,7 @@ }, { "cell_type": "code", - "execution_count": 248, + "execution_count": 295, "metadata": {}, "outputs": [ { @@ -10444,7 +10488,7 @@ " dtype='object')" ] }, - "execution_count": 248, + "execution_count": 295, "metadata": {}, "output_type": "execute_result" } @@ -10463,7 +10507,7 @@ }, { "cell_type": "code", - "execution_count": 249, + "execution_count": 296, "metadata": {}, "outputs": [ { @@ -10655,7 +10699,7 @@ }, { "cell_type": "code", - "execution_count": 250, + "execution_count": 297, "metadata": {}, "outputs": [], "source": [ @@ -10671,7 +10715,7 @@ }, { "cell_type": "code", - "execution_count": 251, + "execution_count": 298, "metadata": {}, "outputs": [ { @@ -10689,45 +10733,7 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Training Set Accuracy: 1.0\n", - "Test Set Accuracy: 0.7\n", - "\n", - "Training Set Classification Report:\n", - " precision recall f1-score support\n", - "\n", - " 0 1.00 1.00 1.00 36\n", - " 1 1.00 1.00 1.00 43\n", - "\n", - " accuracy 1.00 79\n", - " macro avg 1.00 1.00 1.00 79\n", - "weighted avg 1.00 1.00 1.00 79\n", - "\n", - "\n", - "Test Set Classification Report:\n", - " precision recall f1-score support\n", - "\n", - " 0 0.71 0.56 0.63 9\n", - " 1 0.69 0.82 0.75 11\n", - "\n", - " accuracy 0.70 20\n", - " macro avg 0.70 0.69 0.69 20\n", - "weighted avg 0.70 0.70 0.69 20\n", - "\n", - "\n", - "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('MLP', MLPClassifier(max_iter=300, random_state=42))], 'verbose': False, 'scaler': StandardScaler(), 'MLP': MLPClassifier(max_iter=300, random_state=42), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'MLP__activation': 'relu', 'MLP__alpha': 0.0001, 'MLP__batch_size': 'auto', 'MLP__beta_1': 0.9, 'MLP__beta_2': 0.999, 'MLP__early_stopping': False, 'MLP__epsilon': 1e-08, 'MLP__hidden_layer_sizes': (100,), 'MLP__learning_rate': 'constant', 'MLP__learning_rate_init': 0.001, 'MLP__max_fun': 15000, 'MLP__max_iter': 300, 'MLP__momentum': 0.9, 'MLP__n_iter_no_change': 10, 'MLP__nesterovs_momentum': True, 'MLP__power_t': 0.5, 'MLP__random_state': 42, 'MLP__shuffle': True, 'MLP__solver': 'adam', 'MLP__tol': 0.0001, 'MLP__validation_fraction': 0.1, 'MLP__verbose': False, 'MLP__warm_start': False}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.\n", " warnings.warn(\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", @@ -10815,6 +10821,38 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n" ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training Set Accuracy: 1.0\n", + "Test Set Accuracy: 0.7\n", + "\n", + "Training Set Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 1.00 1.00 36\n", + " 1 1.00 1.00 1.00 43\n", + "\n", + " accuracy 1.00 79\n", + " macro avg 1.00 1.00 1.00 79\n", + "weighted avg 1.00 1.00 1.00 79\n", + "\n", + "\n", + "Test Set Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " 0 0.71 0.56 0.63 9\n", + " 1 0.69 0.82 0.75 11\n", + "\n", + " accuracy 0.70 20\n", + " macro avg 0.70 0.69 0.69 20\n", + "weighted avg 0.70 0.70 0.69 20\n", + "\n", + "\n", + "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('MLP', MLPClassifier(max_iter=300, random_state=42))], 'verbose': False, 'scaler': StandardScaler(), 'MLP': MLPClassifier(max_iter=300, random_state=42), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'MLP__activation': 'relu', 'MLP__alpha': 0.0001, 'MLP__batch_size': 'auto', 'MLP__beta_1': 0.9, 'MLP__beta_2': 0.999, 'MLP__early_stopping': False, 'MLP__epsilon': 1e-08, 'MLP__hidden_layer_sizes': (100,), 'MLP__learning_rate': 'constant', 'MLP__learning_rate_init': 0.001, 'MLP__max_fun': 15000, 'MLP__max_iter': 300, 'MLP__momentum': 0.9, 'MLP__n_iter_no_change': 10, 'MLP__nesterovs_momentum': True, 'MLP__power_t': 0.5, 'MLP__random_state': 42, 'MLP__shuffle': True, 'MLP__solver': 'adam', 'MLP__tol': 0.0001, 'MLP__validation_fraction': 0.1, 'MLP__verbose': False, 'MLP__warm_start': False}\n" + ] } ], "source": [ @@ -10853,7 +10891,7 @@ }, { "cell_type": "code", - "execution_count": 252, + "execution_count": 299, "metadata": {}, "outputs": [ { @@ -10879,7 +10917,13 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:1098: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", - " y = column_or_1d(y, warn=True)\n", + " y = column_or_1d(y, warn=True)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.\n", " warnings.warn(\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", @@ -19003,23 +19047,7 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:1098: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", - " y = column_or_1d(y, warn=True)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Average Accuracy: 0.8282828282828283\n", - "Average Precision: 0.46464646464646464\n", - "Average Recall: 0.46464646464646464\n", - "Average F1 Score: 0.46464646464646464\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + " y = column_or_1d(y, warn=True)\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.\n", " warnings.warn(\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", @@ -19105,7 +19133,23 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:1098: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", - " y = column_or_1d(y, warn=True)\n", + " y = column_or_1d(y, warn=True)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Accuracy: 0.8282828282828283\n", + "Average Precision: 0.46464646464646464\n", + "Average Recall: 0.46464646464646464\n", + "Average F1 Score: 0.46464646464646464\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.\n", " warnings.warn(\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", @@ -19243,7 +19287,7 @@ }, { "cell_type": "code", - "execution_count": 253, + "execution_count": 300, "metadata": {}, "outputs": [ { @@ -19272,7 +19316,7 @@ " dtype='object')" ] }, - "execution_count": 253, + "execution_count": 300, "metadata": {}, "output_type": "execute_result" } @@ -19289,41 +19333,9 @@ }, { "cell_type": "code", - "execution_count": 254, + "execution_count": 301, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Training Set Accuracy: 1.0\n", - "Test Set Accuracy: 0.7\n", - "\n", - "Training Set Classification Report:\n", - " precision recall f1-score support\n", - "\n", - " 0 1.00 1.00 1.00 36\n", - " 1 1.00 1.00 1.00 43\n", - "\n", - " accuracy 1.00 79\n", - " macro avg 1.00 1.00 1.00 79\n", - "weighted avg 1.00 1.00 1.00 79\n", - "\n", - "\n", - "Test Set Classification Report:\n", - " precision recall f1-score support\n", - "\n", - " 0 0.71 0.56 0.63 9\n", - " 1 0.69 0.82 0.75 11\n", - "\n", - " accuracy 0.70 20\n", - " macro avg 0.70 0.69 0.69 20\n", - "weighted avg 0.70 0.70 0.69 20\n", - "\n", - "\n", - "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('MLP', MLPClassifier(max_iter=300, random_state=42))], 'verbose': False, 'scaler': StandardScaler(), 'MLP': MLPClassifier(max_iter=300, random_state=42), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'MLP__activation': 'relu', 'MLP__alpha': 0.0001, 'MLP__batch_size': 'auto', 'MLP__beta_1': 0.9, 'MLP__beta_2': 0.999, 'MLP__early_stopping': False, 'MLP__epsilon': 1e-08, 'MLP__hidden_layer_sizes': (100,), 'MLP__learning_rate': 'constant', 'MLP__learning_rate_init': 0.001, 'MLP__max_fun': 15000, 'MLP__max_iter': 300, 'MLP__momentum': 0.9, 'MLP__n_iter_no_change': 10, 'MLP__nesterovs_momentum': True, 'MLP__power_t': 0.5, 'MLP__random_state': 42, 'MLP__shuffle': True, 'MLP__solver': 'adam', 'MLP__tol': 0.0001, 'MLP__validation_fraction': 0.1, 'MLP__verbose': False, 'MLP__warm_start': False}\n" - ] - }, { "name": "stderr", "output_type": "stream", @@ -19383,7 +19395,45 @@ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if is_sparse(pd_dtype):\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", - " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n", + " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training Set Accuracy: 1.0\n", + "Test Set Accuracy: 0.7\n", + "\n", + "Training Set Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 1.00 1.00 36\n", + " 1 1.00 1.00 1.00 43\n", + "\n", + " accuracy 1.00 79\n", + " macro avg 1.00 1.00 1.00 79\n", + "weighted avg 1.00 1.00 1.00 79\n", + "\n", + "\n", + "Test Set Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " 0 0.71 0.56 0.63 9\n", + " 1 0.69 0.82 0.75 11\n", + "\n", + " accuracy 0.70 20\n", + " macro avg 0.70 0.69 0.69 20\n", + "weighted avg 0.70 0.70 0.69 20\n", + "\n", + "\n", + "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('MLP', MLPClassifier(max_iter=300, random_state=42))], 'verbose': False, 'scaler': StandardScaler(), 'MLP': MLPClassifier(max_iter=300, random_state=42), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'MLP__activation': 'relu', 'MLP__alpha': 0.0001, 'MLP__batch_size': 'auto', 'MLP__beta_1': 0.9, 'MLP__beta_2': 0.999, 'MLP__early_stopping': False, 'MLP__epsilon': 1e-08, 'MLP__hidden_layer_sizes': (100,), 'MLP__learning_rate': 'constant', 'MLP__learning_rate_init': 0.001, 'MLP__max_fun': 15000, 'MLP__max_iter': 300, 'MLP__momentum': 0.9, 'MLP__n_iter_no_change': 10, 'MLP__nesterovs_momentum': True, 'MLP__power_t': 0.5, 'MLP__random_state': 42, 'MLP__shuffle': True, 'MLP__solver': 'adam', 'MLP__tol': 0.0001, 'MLP__validation_fraction': 0.1, 'MLP__verbose': False, 'MLP__warm_start': False}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n", "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",