From a15c9d82b3fbdd0165c0bcfbacc6ae0f1a20ef8f Mon Sep 17 00:00:00 2001
From: Ferdinand <ferdinand.hoelzl@studium.uni-hamburg.de>
Date: Mon, 8 Apr 2024 18:56:24 +0200
Subject: [PATCH] Residual
---
analysis.ipynb | 678 ++++++++++++++++++++++++++-----------------------
1 file changed, 364 insertions(+), 314 deletions(-)
diff --git a/analysis.ipynb b/analysis.ipynb
index c6c1a1c..827d826 100644
--- a/analysis.ipynb
+++ b/analysis.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 208,
+ "execution_count": 255,
"metadata": {},
"outputs": [],
"source": [
@@ -41,7 +41,7 @@
},
{
"cell_type": "code",
- "execution_count": 209,
+ "execution_count": 256,
"metadata": {},
"outputs": [
{
@@ -1054,7 +1054,7 @@
"[99 rows x 39 columns]"
]
},
- "execution_count": 209,
+ "execution_count": 256,
"metadata": {},
"output_type": "execute_result"
}
@@ -1082,7 +1082,7 @@
},
{
"cell_type": "code",
- "execution_count": 210,
+ "execution_count": 257,
"metadata": {},
"outputs": [],
"source": [
@@ -1116,7 +1116,7 @@
},
{
"cell_type": "code",
- "execution_count": 211,
+ "execution_count": 258,
"metadata": {},
"outputs": [
{
@@ -1416,7 +1416,7 @@
"max 4.000000 7.208333 0.277313 "
]
},
- "execution_count": 211,
+ "execution_count": 258,
"metadata": {},
"output_type": "execute_result"
}
@@ -1427,7 +1427,7 @@
},
{
"cell_type": "code",
- "execution_count": 212,
+ "execution_count": 259,
"metadata": {},
"outputs": [
{
@@ -1499,7 +1499,7 @@
},
{
"cell_type": "code",
- "execution_count": 213,
+ "execution_count": 260,
"metadata": {},
"outputs": [
{
@@ -1580,7 +1580,7 @@
},
{
"cell_type": "code",
- "execution_count": 214,
+ "execution_count": 261,
"metadata": {},
"outputs": [
{
@@ -1655,7 +1655,7 @@
},
{
"cell_type": "code",
- "execution_count": 215,
+ "execution_count": 262,
"metadata": {},
"outputs": [
{
@@ -1993,7 +1993,7 @@
},
{
"cell_type": "code",
- "execution_count": 216,
+ "execution_count": 263,
"metadata": {},
"outputs": [
{
@@ -2012,7 +2012,7 @@
"Text(0.5, 1.0, 'Density Plot of Art_Score ')"
]
},
- "execution_count": 216,
+ "execution_count": 263,
"metadata": {},
"output_type": "execute_result"
},
@@ -2037,7 +2037,7 @@
},
{
"cell_type": "code",
- "execution_count": 217,
+ "execution_count": 264,
"metadata": {},
"outputs": [
{
@@ -2071,7 +2071,7 @@
},
{
"cell_type": "code",
- "execution_count": 218,
+ "execution_count": 265,
"metadata": {},
"outputs": [
{
@@ -2105,7 +2105,7 @@
},
{
"cell_type": "code",
- "execution_count": 219,
+ "execution_count": 266,
"metadata": {},
"outputs": [
{
@@ -2153,7 +2153,7 @@
},
{
"cell_type": "code",
- "execution_count": 220,
+ "execution_count": 267,
"metadata": {},
"outputs": [
{
@@ -2227,7 +2227,7 @@
},
{
"cell_type": "code",
- "execution_count": 221,
+ "execution_count": 268,
"metadata": {},
"outputs": [
{
@@ -2245,13 +2245,7 @@
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
" if is_sparse(pd_dtype):\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
- " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
+ " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n",
"/var/folders/lj/q1ndb2493275c2y0vzyplnk00000gn/T/ipykernel_25655/2287162676.py:19: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
@@ -2299,7 +2293,7 @@
},
{
"cell_type": "code",
- "execution_count": 222,
+ "execution_count": 269,
"metadata": {},
"outputs": [
{
@@ -2340,7 +2334,7 @@
},
{
"cell_type": "code",
- "execution_count": 223,
+ "execution_count": 270,
"metadata": {},
"outputs": [
{
@@ -2435,12 +2429,12 @@
" <td>4</td>\n",
" <td>2.565476</td>\n",
" <td>0.000000</td>\n",
+ " <td>3</td>\n",
" <td>1</td>\n",
- " <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
- " <td>4</td>\n",
- " <td>0</td>\n",
+ " <td>3</td>\n",
+ " <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>HafenCity</th>\n",
@@ -2461,12 +2455,12 @@
" <td>1</td>\n",
" <td>1.952381</td>\n",
" <td>0.005758</td>\n",
+ " <td>3</td>\n",
" <td>1</td>\n",
- " <td>0</td>\n",
- " <td>1</td>\n",
- " <td>0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
+ " <td>3</td>\n",
+ " <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Neustadt</th>\n",
@@ -2487,12 +2481,12 @@
" <td>4</td>\n",
" <td>2.702381</td>\n",
" <td>0.001174</td>\n",
+ " <td>3</td>\n",
" <td>1</td>\n",
- " <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
- " <td>4</td>\n",
- " <td>0</td>\n",
+ " <td>3</td>\n",
+ " <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>St. Pauli</th>\n",
@@ -2513,12 +2507,12 @@
" <td>2</td>\n",
" <td>1.851190</td>\n",
" <td>0.009631</td>\n",
+ " <td>3</td>\n",
" <td>1</td>\n",
- " <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
- " <td>4</td>\n",
- " <td>0</td>\n",
+ " <td>3</td>\n",
+ " <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>St. Georg</th>\n",
@@ -2539,12 +2533,12 @@
" <td>4</td>\n",
" <td>2.523810</td>\n",
" <td>0.008493</td>\n",
+ " <td>3</td>\n",
" <td>1</td>\n",
- " <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
- " <td>4</td>\n",
- " <td>0</td>\n",
+ " <td>3</td>\n",
+ " <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
@@ -2591,12 +2585,12 @@
" <td>0</td>\n",
" <td>0.422619</td>\n",
" <td>0.104116</td>\n",
+ " <td>2</td>\n",
" <td>0</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
" <td>0</td>\n",
- " <td>3</td>\n",
- " <td>1</td>\n",
+ " <td>0</td>\n",
+ " <td>2</td>\n",
+ " <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Neugraben-Fischbek</th>\n",
@@ -2617,12 +2611,12 @@
" <td>0</td>\n",
" <td>1.857143</td>\n",
" <td>0.130211</td>\n",
+ " <td>2</td>\n",
" <td>0</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
" <td>0</td>\n",
- " <td>3</td>\n",
- " <td>1</td>\n",
+ " <td>0</td>\n",
+ " <td>2</td>\n",
+ " <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Francop</th>\n",
@@ -2643,12 +2637,12 @@
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.114626</td>\n",
- " <td>2</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
+ " <td>0</td>\n",
+ " <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
+ " <td>0</td>\n",
+ " <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Neuenfelde</th>\n",
@@ -2669,12 +2663,12 @@
" <td>0</td>\n",
" <td>0.375000</td>\n",
" <td>0.162791</td>\n",
+ " <td>2</td>\n",
" <td>0</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
" <td>0</td>\n",
- " <td>3</td>\n",
- " <td>1</td>\n",
+ " <td>0</td>\n",
+ " <td>2</td>\n",
+ " <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Cranz</th>\n",
@@ -2695,12 +2689,12 @@
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.200686</td>\n",
- " <td>2</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
+ " <td>0</td>\n",
+ " <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
+ " <td>0</td>\n",
+ " <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
@@ -2864,36 +2858,36 @@
"\n",
" KMeans_5 KMeans_2 Spectral_5 Spectral_2 Agglo_5 \\\n",
"Stadtteil \n",
- "Hamburg-Altstadt 1 0 2 0 4 \n",
- "HafenCity 1 0 1 0 4 \n",
- "Neustadt 1 0 2 0 4 \n",
- "St. Pauli 1 0 2 0 4 \n",
- "St. Georg 1 0 2 0 4 \n",
+ "Hamburg-Altstadt 3 1 2 0 3 \n",
+ "HafenCity 3 1 4 0 3 \n",
+ "Neustadt 3 1 2 0 3 \n",
+ "St. Pauli 3 1 2 0 3 \n",
+ "St. Georg 3 1 2 0 3 \n",
"... ... ... ... ... ... \n",
- "Hausbruch 0 1 1 0 3 \n",
- "Neugraben-Fischbek 0 1 1 0 3 \n",
- "Francop 2 1 1 1 0 \n",
- "Neuenfelde 0 1 1 0 3 \n",
- "Cranz 2 1 1 1 0 \n",
+ "Hausbruch 2 0 0 0 2 \n",
+ "Neugraben-Fischbek 2 0 0 0 2 \n",
+ "Francop 0 0 0 1 0 \n",
+ "Neuenfelde 2 0 0 0 2 \n",
+ "Cranz 0 0 0 1 0 \n",
"\n",
" Agglo_2 \n",
"Stadtteil \n",
- "Hamburg-Altstadt 0 \n",
- "HafenCity 0 \n",
- "Neustadt 0 \n",
- "St. Pauli 0 \n",
- "St. Georg 0 \n",
+ "Hamburg-Altstadt 1 \n",
+ "HafenCity 1 \n",
+ "Neustadt 1 \n",
+ "St. Pauli 1 \n",
+ "St. Georg 1 \n",
"... ... \n",
- "Hausbruch 1 \n",
- "Neugraben-Fischbek 1 \n",
- "Francop 1 \n",
- "Neuenfelde 1 \n",
- "Cranz 1 \n",
+ "Hausbruch 0 \n",
+ "Neugraben-Fischbek 0 \n",
+ "Francop 0 \n",
+ "Neuenfelde 0 \n",
+ "Cranz 0 \n",
"\n",
"[99 rows x 23 columns]"
]
},
- "execution_count": 223,
+ "execution_count": 270,
"metadata": {},
"output_type": "execute_result"
}
@@ -2904,7 +2898,7 @@
},
{
"cell_type": "code",
- "execution_count": 224,
+ "execution_count": 271,
"metadata": {},
"outputs": [],
"source": [
@@ -2936,7 +2930,7 @@
},
{
"cell_type": "code",
- "execution_count": 225,
+ "execution_count": 272,
"metadata": {},
"outputs": [
{
@@ -2953,7 +2947,7 @@
" dtype='object')"
]
},
- "execution_count": 225,
+ "execution_count": 272,
"metadata": {},
"output_type": "execute_result"
}
@@ -2979,7 +2973,7 @@
},
{
"cell_type": "code",
- "execution_count": 226,
+ "execution_count": 273,
"metadata": {},
"outputs": [
{
@@ -3076,7 +3070,7 @@
},
{
"cell_type": "code",
- "execution_count": 227,
+ "execution_count": 274,
"metadata": {},
"outputs": [
{
@@ -3125,7 +3119,7 @@
},
{
"cell_type": "code",
- "execution_count": 228,
+ "execution_count": 275,
"metadata": {},
"outputs": [
{
@@ -3149,18 +3143,6 @@
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
" if is_sparse(pd_dtype):\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
- " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n",
- "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
- " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n",
- "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
- " if is_sparse(pd_dtype):\n",
- "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
- " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n",
- "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
- " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n",
- "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
- " if is_sparse(pd_dtype):\n",
- "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
" if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n"
]
},
@@ -3169,21 +3151,21 @@
"output_type": "stream",
"text": [
"Test Data Evaluation:\n",
- "R² Score: -0.4464530491116274\n",
- "Mean Absolute Error (MAE): 1.0651785714285713\n",
- "Mean Squared Error (MSE): 1.9987404336734695\n",
- "Root Mean Squared Error (RMSE): 1.413768168291205\n",
+ "R² Score: -0.5726397634494289\n",
+ "Mean Absolute Error (MAE): 1.1434523809523809\n",
+ "Mean Squared Error (MSE): 2.173107993197279\n",
+ "Root Mean Squared Error (RMSE): 1.4741465304362653\n",
" Coefficient\n",
- "Anteil Kinder und Jugendlicher unter 18 Jahren ... 0.091625\n",
- "Anteil älterer Einwohner: innen über 64 Jahren ... 0.066265\n",
- "Einwohner: innen je km² 0.310324\n",
- "Gesamtbetrag Einkünfte Median - [€] 0.023681\n",
- "Anteil der Sozial-wohnungen an allen Wohnungen 0.030918\n",
- "Durch-schnittliche Wohnfläche je Einwohner:in i... 0.126750\n",
- "Durch-schnittliche Anzahl der Personen je Haushalt 0.041658\n",
- "organic_restaurants_count 0.000007\n",
+ "Anteil Kinder und Jugendlicher unter 18 Jahren ... 0.131468\n",
+ "Anteil älterer Einwohner: innen über 64 Jahren ... 0.001418\n",
+ "Einwohner: innen je km² 0.308173\n",
+ "Gesamtbetrag Einkünfte Median - [€] 0.127030\n",
+ "Anteil der Sozial-wohnungen an allen Wohnungen 0.029023\n",
+ "Durch-schnittliche Wohnfläche je Einwohner:in i... 0.025656\n",
+ "Durch-schnittliche Anzahl der Personen je Haushalt 0.004799\n",
+ "organic_restaurants_count 0.001845\n",
"vegan_restaurants_count 0.243526\n",
- "distance_rathaus 0.065245\n"
+ "distance_rathaus 0.127063\n"
]
},
{
@@ -3207,6 +3189,18 @@
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
" if is_sparse(pd_dtype):\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
+ " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n",
+ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
+ " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n",
+ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
+ " if is_sparse(pd_dtype):\n",
+ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
+ " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n",
+ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
+ " if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n",
+ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
+ " if is_sparse(pd_dtype):\n",
+ "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
" if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n"
]
}
@@ -3260,7 +3254,7 @@
},
{
"cell_type": "code",
- "execution_count": 229,
+ "execution_count": 276,
"metadata": {},
"outputs": [
{
@@ -3363,7 +3357,7 @@
},
{
"cell_type": "code",
- "execution_count": 230,
+ "execution_count": 277,
"metadata": {},
"outputs": [
{
@@ -3470,7 +3464,7 @@
},
{
"cell_type": "code",
- "execution_count": 231,
+ "execution_count": 278,
"metadata": {},
"outputs": [
{
@@ -3487,7 +3481,7 @@
" dtype='object')"
]
},
- "execution_count": 231,
+ "execution_count": 278,
"metadata": {},
"output_type": "execute_result"
}
@@ -3503,7 +3497,7 @@
},
{
"cell_type": "code",
- "execution_count": 232,
+ "execution_count": 279,
"metadata": {},
"outputs": [
{
@@ -3907,13 +3901,7 @@
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
" if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:1623: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
- " y = column_or_1d(y, warn=True)\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
+ " y = column_or_1d(y, warn=True)\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (10) reached and the optimization hasn't converged yet.\n",
" warnings.warn(\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
@@ -4237,7 +4225,13 @@
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
" if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:1623: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
- " y = column_or_1d(y, warn=True)\n",
+ " y = column_or_1d(y, warn=True)\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (10) reached and the optimization hasn't converged yet.\n",
" warnings.warn(\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
@@ -8909,7 +8903,7 @@
},
{
"cell_type": "code",
- "execution_count": 233,
+ "execution_count": 280,
"metadata": {},
"outputs": [
{
@@ -9003,7 +8997,7 @@
"max 36.786614 6.065197 6.065197"
]
},
- "execution_count": 233,
+ "execution_count": 280,
"metadata": {},
"output_type": "execute_result"
}
@@ -9024,7 +9018,7 @@
},
{
"cell_type": "code",
- "execution_count": 234,
+ "execution_count": 281,
"metadata": {},
"outputs": [
{
@@ -9055,7 +9049,7 @@
" dtype='object')"
]
},
- "execution_count": 234,
+ "execution_count": 281,
"metadata": {},
"output_type": "execute_result"
}
@@ -9070,7 +9064,7 @@
},
{
"cell_type": "code",
- "execution_count": 235,
+ "execution_count": 282,
"metadata": {},
"outputs": [
{
@@ -9177,7 +9171,7 @@
},
{
"cell_type": "code",
- "execution_count": 236,
+ "execution_count": 283,
"metadata": {},
"outputs": [
{
@@ -9191,7 +9185,7 @@
" dtype='object')"
]
},
- "execution_count": 236,
+ "execution_count": 283,
"metadata": {},
"output_type": "execute_result"
}
@@ -9221,40 +9215,36 @@
},
{
"cell_type": "code",
- "execution_count": 237,
+ "execution_count": 304,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Training Set Accuracy: 0.8227848101265823\n",
- "Test Set Accuracy: 0.55\n",
+ "Training Set Accuracy: 0.8734177215189873\n",
+ "Test Set Accuracy: 0.85\n",
"\n",
"Training Set Classification Report:\n",
" precision recall f1-score support\n",
"\n",
- " 0 0.84 0.89 0.86 36\n",
- " 1 0.77 0.82 0.79 33\n",
- " 2 1.00 0.80 0.89 5\n",
- " 3 1.00 0.25 0.40 4\n",
- " 4 1.00 1.00 1.00 1\n",
+ " 0 0.86 0.86 0.86 36\n",
+ " 1 0.88 0.88 0.88 43\n",
"\n",
- " accuracy 0.82 79\n",
- " macro avg 0.92 0.75 0.79 79\n",
- "weighted avg 0.83 0.82 0.82 79\n",
+ " accuracy 0.87 79\n",
+ " macro avg 0.87 0.87 0.87 79\n",
+ "weighted avg 0.87 0.87 0.87 79\n",
"\n",
"\n",
"Test Set Classification Report:\n",
" precision recall f1-score support\n",
"\n",
- " 0 0.70 0.78 0.74 9\n",
- " 1 0.40 0.57 0.47 7\n",
- " 2 0.00 0.00 0.00 4\n",
+ " 0 0.88 0.78 0.82 9\n",
+ " 1 0.83 0.91 0.87 11\n",
"\n",
- " accuracy 0.55 20\n",
- " macro avg 0.37 0.45 0.40 20\n",
- "weighted avg 0.45 0.55 0.50 20\n",
+ " accuracy 0.85 20\n",
+ " macro avg 0.85 0.84 0.85 20\n",
+ "weighted avg 0.85 0.85 0.85 20\n",
"\n",
"\n",
"Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('LogM', LogisticRegression())], 'verbose': False, 'scaler': StandardScaler(), 'LogM': LogisticRegression(), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'LogM__C': 1.0, 'LogM__class_weight': None, 'LogM__dual': False, 'LogM__fit_intercept': True, 'LogM__intercept_scaling': 1, 'LogM__l1_ratio': None, 'LogM__max_iter': 100, 'LogM__multi_class': 'auto', 'LogM__n_jobs': None, 'LogM__penalty': 'l2', 'LogM__random_state': None, 'LogM__solver': 'lbfgs', 'LogM__tol': 0.0001, 'LogM__verbose': 0, 'LogM__warm_start': False}\n"
@@ -9379,14 +9369,21 @@
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
" if is_sparse(pd_dtype):\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
- " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n",
- "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
- " _warn_prf(average, modifier, msg_start, len(result))\n",
- "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
- " _warn_prf(average, modifier, msg_start, len(result))\n",
- "/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
- " _warn_prf(average, modifier, msg_start, len(result))\n"
+ " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n"
]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1,\n",
+ " 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1,\n",
+ " 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1,\n",
+ " 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1])"
+ ]
+ },
+ "execution_count": 304,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
@@ -9428,7 +9425,53 @@
},
{
"cell_type": "code",
- "execution_count": 238,
+ "execution_count": 314,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/var/folders/lj/q1ndb2493275c2y0vzyplnk00000gn/T/ipykernel_25655/4287676306.py:4: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " data_df['y_pred'] = pd.concat([y_pred_train_series, y_pred_test_series])\n",
+ "/var/folders/lj/q1ndb2493275c2y0vzyplnk00000gn/T/ipykernel_25655/4287676306.py:5: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " data_df['residuals'] = data_df['market_count'] - data_df['y_pred']\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "Index(['Altona-Nord', 'Bahrenfeld', 'Othmarschen', 'Marienthal',\n",
+ " 'Hummelsbüttel', 'Eißendorf', 'Heimfeld'],\n",
+ " dtype='object', name='Stadtteil')"
+ ]
+ },
+ "execution_count": 314,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_pred_train_series = pd.Series(y_pred_train, index=y_train.index)\n",
+ "y_pred_test_series = pd.Series(y_pred_test, index=y_test.index)\n",
+ "\n",
+ "data_df['y_pred'] = pd.concat([y_pred_train_series, y_pred_test_series])\n",
+ "data_df['residuals'] = data_df['market_count'] - data_df['y_pred']\n",
+ "\n",
+ "data_df[data_df['residuals'] < 0].index"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 285,
"metadata": {},
"outputs": [
{
@@ -9501,7 +9544,7 @@
},
{
"cell_type": "code",
- "execution_count": 239,
+ "execution_count": 286,
"metadata": {},
"outputs": [
{
@@ -9578,7 +9621,7 @@
},
{
"cell_type": "code",
- "execution_count": 240,
+ "execution_count": 287,
"metadata": {},
"outputs": [
{
@@ -9587,15 +9630,15 @@
"text": [
" precision recall f1-score support\n",
"\n",
- " 0 0.59 0.22 0.32 45\n",
- " 1 0.35 0.23 0.27 40\n",
- " 2 0.05 0.11 0.06 9\n",
- " 3 0.11 0.50 0.18 4\n",
+ " 0 0.40 0.18 0.25 45\n",
+ " 1 0.36 0.20 0.26 40\n",
+ " 2 0.00 0.00 0.00 9\n",
+ " 3 0.05 0.25 0.08 4\n",
" 4 0.00 0.00 0.00 1\n",
"\n",
- " accuracy 0.22 99\n",
- " macro avg 0.22 0.21 0.17 99\n",
- "weighted avg 0.42 0.22 0.27 99\n",
+ " accuracy 0.17 99\n",
+ " macro avg 0.16 0.13 0.12 99\n",
+ "weighted avg 0.33 0.17 0.22 99\n",
"\n"
]
},
@@ -9637,7 +9680,7 @@
},
{
"cell_type": "code",
- "execution_count": 241,
+ "execution_count": 288,
"metadata": {},
"outputs": [
{
@@ -9663,7 +9706,7 @@
},
{
"cell_type": "code",
- "execution_count": 242,
+ "execution_count": 289,
"metadata": {},
"outputs": [
{
@@ -9880,7 +9923,7 @@
},
{
"cell_type": "code",
- "execution_count": 243,
+ "execution_count": 290,
"metadata": {},
"outputs": [
{
@@ -9934,7 +9977,7 @@
},
{
"cell_type": "code",
- "execution_count": 244,
+ "execution_count": 291,
"metadata": {},
"outputs": [
{
@@ -9948,7 +9991,7 @@
" dtype='object')"
]
},
- "execution_count": 244,
+ "execution_count": 291,
"metadata": {},
"output_type": "execute_result"
}
@@ -9980,7 +10023,7 @@
},
{
"cell_type": "code",
- "execution_count": 245,
+ "execution_count": 292,
"metadata": {},
"outputs": [
{
@@ -9988,7 +10031,7 @@
"output_type": "stream",
"text": [
"Training Set Accuracy: 1.0\n",
- "Test Set Accuracy: 0.5\n",
+ "Test Set Accuracy: 0.55\n",
"\n",
"Training Set Classification Report:\n",
" precision recall f1-score support\n",
@@ -10007,14 +10050,15 @@
"Test Set Classification Report:\n",
" precision recall f1-score support\n",
"\n",
- " 0 0.78 0.78 0.78 9\n",
- " 1 0.38 0.43 0.40 7\n",
+ " 0 0.88 0.78 0.82 9\n",
+ " 1 0.50 0.57 0.53 7\n",
" 2 0.00 0.00 0.00 4\n",
+ " 3 0.00 0.00 0.00 0\n",
" 4 0.00 0.00 0.00 0\n",
"\n",
- " accuracy 0.50 20\n",
- " macro avg 0.29 0.30 0.29 20\n",
- "weighted avg 0.48 0.50 0.49 20\n",
+ " accuracy 0.55 20\n",
+ " macro avg 0.28 0.27 0.27 20\n",
+ "weighted avg 0.57 0.55 0.56 20\n",
"\n",
"\n",
"Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('DTC', DecisionTreeClassifier())], 'verbose': False, 'scaler': StandardScaler(), 'DTC': DecisionTreeClassifier(), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'DTC__ccp_alpha': 0.0, 'DTC__class_weight': None, 'DTC__criterion': 'gini', 'DTC__max_depth': None, 'DTC__max_features': None, 'DTC__max_leaf_nodes': None, 'DTC__min_impurity_decrease': 0.0, 'DTC__min_samples_leaf': 1, 'DTC__min_samples_split': 2, 'DTC__min_weight_fraction_leaf': 0.0, 'DTC__random_state': None, 'DTC__splitter': 'best'}\n"
@@ -10184,7 +10228,7 @@
},
{
"cell_type": "code",
- "execution_count": 246,
+ "execution_count": 293,
"metadata": {},
"outputs": [
{
@@ -10198,7 +10242,7 @@
" dtype='object')"
]
},
- "execution_count": 246,
+ "execution_count": 293,
"metadata": {},
"output_type": "execute_result"
}
@@ -10230,41 +10274,9 @@
},
{
"cell_type": "code",
- "execution_count": 247,
+ "execution_count": 294,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Training Set Accuracy: 1.0\n",
- "Test Set Accuracy: 0.85\n",
- "\n",
- "Training Set Classification Report:\n",
- " precision recall f1-score support\n",
- "\n",
- " 0 1.00 1.00 1.00 36\n",
- " 1 1.00 1.00 1.00 43\n",
- "\n",
- " accuracy 1.00 79\n",
- " macro avg 1.00 1.00 1.00 79\n",
- "weighted avg 1.00 1.00 1.00 79\n",
- "\n",
- "\n",
- "Test Set Classification Report:\n",
- " precision recall f1-score support\n",
- "\n",
- " 0 0.88 0.78 0.82 9\n",
- " 1 0.83 0.91 0.87 11\n",
- "\n",
- " accuracy 0.85 20\n",
- " macro avg 0.85 0.84 0.85 20\n",
- "weighted avg 0.85 0.85 0.85 20\n",
- "\n",
- "\n",
- "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('DTC', DecisionTreeClassifier())], 'verbose': False, 'scaler': StandardScaler(), 'DTC': DecisionTreeClassifier(), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'DTC__ccp_alpha': 0.0, 'DTC__class_weight': None, 'DTC__criterion': 'gini', 'DTC__max_depth': None, 'DTC__max_features': None, 'DTC__max_leaf_nodes': None, 'DTC__min_impurity_decrease': 0.0, 'DTC__min_samples_leaf': 1, 'DTC__min_samples_split': 2, 'DTC__min_weight_fraction_leaf': 0.0, 'DTC__random_state': None, 'DTC__splitter': 'best'}\n"
- ]
- },
{
"name": "stderr",
"output_type": "stream",
@@ -10384,6 +10396,38 @@
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
" if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n"
]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Training Set Accuracy: 1.0\n",
+ "Test Set Accuracy: 0.8\n",
+ "\n",
+ "Training Set Classification Report:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 1.00 1.00 1.00 36\n",
+ " 1 1.00 1.00 1.00 43\n",
+ "\n",
+ " accuracy 1.00 79\n",
+ " macro avg 1.00 1.00 1.00 79\n",
+ "weighted avg 1.00 1.00 1.00 79\n",
+ "\n",
+ "\n",
+ "Test Set Classification Report:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.78 0.78 0.78 9\n",
+ " 1 0.82 0.82 0.82 11\n",
+ "\n",
+ " accuracy 0.80 20\n",
+ " macro avg 0.80 0.80 0.80 20\n",
+ "weighted avg 0.80 0.80 0.80 20\n",
+ "\n",
+ "\n",
+ "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('DTC', DecisionTreeClassifier())], 'verbose': False, 'scaler': StandardScaler(), 'DTC': DecisionTreeClassifier(), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'DTC__ccp_alpha': 0.0, 'DTC__class_weight': None, 'DTC__criterion': 'gini', 'DTC__max_depth': None, 'DTC__max_features': None, 'DTC__max_leaf_nodes': None, 'DTC__min_impurity_decrease': 0.0, 'DTC__min_samples_leaf': 1, 'DTC__min_samples_split': 2, 'DTC__min_weight_fraction_leaf': 0.0, 'DTC__random_state': None, 'DTC__splitter': 'best'}\n"
+ ]
}
],
"source": [
@@ -10430,7 +10474,7 @@
},
{
"cell_type": "code",
- "execution_count": 248,
+ "execution_count": 295,
"metadata": {},
"outputs": [
{
@@ -10444,7 +10488,7 @@
" dtype='object')"
]
},
- "execution_count": 248,
+ "execution_count": 295,
"metadata": {},
"output_type": "execute_result"
}
@@ -10463,7 +10507,7 @@
},
{
"cell_type": "code",
- "execution_count": 249,
+ "execution_count": 296,
"metadata": {},
"outputs": [
{
@@ -10655,7 +10699,7 @@
},
{
"cell_type": "code",
- "execution_count": 250,
+ "execution_count": 297,
"metadata": {},
"outputs": [],
"source": [
@@ -10671,7 +10715,7 @@
},
{
"cell_type": "code",
- "execution_count": 251,
+ "execution_count": 298,
"metadata": {},
"outputs": [
{
@@ -10689,45 +10733,7 @@
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
" if is_sparse(pd_dtype):\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
- " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Training Set Accuracy: 1.0\n",
- "Test Set Accuracy: 0.7\n",
- "\n",
- "Training Set Classification Report:\n",
- " precision recall f1-score support\n",
- "\n",
- " 0 1.00 1.00 1.00 36\n",
- " 1 1.00 1.00 1.00 43\n",
- "\n",
- " accuracy 1.00 79\n",
- " macro avg 1.00 1.00 1.00 79\n",
- "weighted avg 1.00 1.00 1.00 79\n",
- "\n",
- "\n",
- "Test Set Classification Report:\n",
- " precision recall f1-score support\n",
- "\n",
- " 0 0.71 0.56 0.63 9\n",
- " 1 0.69 0.82 0.75 11\n",
- "\n",
- " accuracy 0.70 20\n",
- " macro avg 0.70 0.69 0.69 20\n",
- "weighted avg 0.70 0.70 0.69 20\n",
- "\n",
- "\n",
- "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('MLP', MLPClassifier(max_iter=300, random_state=42))], 'verbose': False, 'scaler': StandardScaler(), 'MLP': MLPClassifier(max_iter=300, random_state=42), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'MLP__activation': 'relu', 'MLP__alpha': 0.0001, 'MLP__batch_size': 'auto', 'MLP__beta_1': 0.9, 'MLP__beta_2': 0.999, 'MLP__early_stopping': False, 'MLP__epsilon': 1e-08, 'MLP__hidden_layer_sizes': (100,), 'MLP__learning_rate': 'constant', 'MLP__learning_rate_init': 0.001, 'MLP__max_fun': 15000, 'MLP__max_iter': 300, 'MLP__momentum': 0.9, 'MLP__n_iter_no_change': 10, 'MLP__nesterovs_momentum': True, 'MLP__power_t': 0.5, 'MLP__random_state': 42, 'MLP__shuffle': True, 'MLP__solver': 'adam', 'MLP__tol': 0.0001, 'MLP__validation_fraction': 0.1, 'MLP__verbose': False, 'MLP__warm_start': False}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
+ " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.\n",
" warnings.warn(\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
@@ -10815,6 +10821,38 @@
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
" if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n"
]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Training Set Accuracy: 1.0\n",
+ "Test Set Accuracy: 0.7\n",
+ "\n",
+ "Training Set Classification Report:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 1.00 1.00 1.00 36\n",
+ " 1 1.00 1.00 1.00 43\n",
+ "\n",
+ " accuracy 1.00 79\n",
+ " macro avg 1.00 1.00 1.00 79\n",
+ "weighted avg 1.00 1.00 1.00 79\n",
+ "\n",
+ "\n",
+ "Test Set Classification Report:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.71 0.56 0.63 9\n",
+ " 1 0.69 0.82 0.75 11\n",
+ "\n",
+ " accuracy 0.70 20\n",
+ " macro avg 0.70 0.69 0.69 20\n",
+ "weighted avg 0.70 0.70 0.69 20\n",
+ "\n",
+ "\n",
+ "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('MLP', MLPClassifier(max_iter=300, random_state=42))], 'verbose': False, 'scaler': StandardScaler(), 'MLP': MLPClassifier(max_iter=300, random_state=42), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'MLP__activation': 'relu', 'MLP__alpha': 0.0001, 'MLP__batch_size': 'auto', 'MLP__beta_1': 0.9, 'MLP__beta_2': 0.999, 'MLP__early_stopping': False, 'MLP__epsilon': 1e-08, 'MLP__hidden_layer_sizes': (100,), 'MLP__learning_rate': 'constant', 'MLP__learning_rate_init': 0.001, 'MLP__max_fun': 15000, 'MLP__max_iter': 300, 'MLP__momentum': 0.9, 'MLP__n_iter_no_change': 10, 'MLP__nesterovs_momentum': True, 'MLP__power_t': 0.5, 'MLP__random_state': 42, 'MLP__shuffle': True, 'MLP__solver': 'adam', 'MLP__tol': 0.0001, 'MLP__validation_fraction': 0.1, 'MLP__verbose': False, 'MLP__warm_start': False}\n"
+ ]
}
],
"source": [
@@ -10853,7 +10891,7 @@
},
{
"cell_type": "code",
- "execution_count": 252,
+ "execution_count": 299,
"metadata": {},
"outputs": [
{
@@ -10879,7 +10917,13 @@
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
" if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:1098: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
- " y = column_or_1d(y, warn=True)\n",
+ " y = column_or_1d(y, warn=True)\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.\n",
" warnings.warn(\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
@@ -19003,23 +19047,7 @@
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
" if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:1098: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
- " y = column_or_1d(y, warn=True)\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Average Accuracy: 0.8282828282828283\n",
- "Average Precision: 0.46464646464646464\n",
- "Average Recall: 0.46464646464646464\n",
- "Average F1 Score: 0.46464646464646464\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
+ " y = column_or_1d(y, warn=True)\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.\n",
" warnings.warn(\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
@@ -19105,7 +19133,23 @@
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
" if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:1098: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
- " y = column_or_1d(y, warn=True)\n",
+ " y = column_or_1d(y, warn=True)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Average Accuracy: 0.8282828282828283\n",
+ "Average Precision: 0.46464646464646464\n",
+ "Average Recall: 0.46464646464646464\n",
+ "Average F1 Score: 0.46464646464646464\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.\n",
" warnings.warn(\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
@@ -19243,7 +19287,7 @@
},
{
"cell_type": "code",
- "execution_count": 253,
+ "execution_count": 300,
"metadata": {},
"outputs": [
{
@@ -19272,7 +19316,7 @@
" dtype='object')"
]
},
- "execution_count": 253,
+ "execution_count": 300,
"metadata": {},
"output_type": "execute_result"
}
@@ -19289,41 +19333,9 @@
},
{
"cell_type": "code",
- "execution_count": 254,
+ "execution_count": 301,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Training Set Accuracy: 1.0\n",
- "Test Set Accuracy: 0.7\n",
- "\n",
- "Training Set Classification Report:\n",
- " precision recall f1-score support\n",
- "\n",
- " 0 1.00 1.00 1.00 36\n",
- " 1 1.00 1.00 1.00 43\n",
- "\n",
- " accuracy 1.00 79\n",
- " macro avg 1.00 1.00 1.00 79\n",
- "weighted avg 1.00 1.00 1.00 79\n",
- "\n",
- "\n",
- "Test Set Classification Report:\n",
- " precision recall f1-score support\n",
- "\n",
- " 0 0.71 0.56 0.63 9\n",
- " 1 0.69 0.82 0.75 11\n",
- "\n",
- " accuracy 0.70 20\n",
- " macro avg 0.70 0.69 0.69 20\n",
- "weighted avg 0.70 0.70 0.69 20\n",
- "\n",
- "\n",
- "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('MLP', MLPClassifier(max_iter=300, random_state=42))], 'verbose': False, 'scaler': StandardScaler(), 'MLP': MLPClassifier(max_iter=300, random_state=42), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'MLP__activation': 'relu', 'MLP__alpha': 0.0001, 'MLP__batch_size': 'auto', 'MLP__beta_1': 0.9, 'MLP__beta_2': 0.999, 'MLP__early_stopping': False, 'MLP__epsilon': 1e-08, 'MLP__hidden_layer_sizes': (100,), 'MLP__learning_rate': 'constant', 'MLP__learning_rate_init': 0.001, 'MLP__max_fun': 15000, 'MLP__max_iter': 300, 'MLP__momentum': 0.9, 'MLP__n_iter_no_change': 10, 'MLP__nesterovs_momentum': True, 'MLP__power_t': 0.5, 'MLP__random_state': 42, 'MLP__shuffle': True, 'MLP__solver': 'adam', 'MLP__tol': 0.0001, 'MLP__validation_fraction': 0.1, 'MLP__verbose': False, 'MLP__warm_start': False}\n"
- ]
- },
{
"name": "stderr",
"output_type": "stream",
@@ -19383,7 +19395,45 @@
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
" if is_sparse(pd_dtype):\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:614: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
- " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n",
+ " if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Training Set Accuracy: 1.0\n",
+ "Test Set Accuracy: 0.7\n",
+ "\n",
+ "Training Set Classification Report:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 1.00 1.00 1.00 36\n",
+ " 1 1.00 1.00 1.00 43\n",
+ "\n",
+ " accuracy 1.00 79\n",
+ " macro avg 1.00 1.00 1.00 79\n",
+ "weighted avg 1.00 1.00 1.00 79\n",
+ "\n",
+ "\n",
+ "Test Set Classification Report:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.71 0.56 0.63 9\n",
+ " 1 0.69 0.82 0.75 11\n",
+ "\n",
+ " accuracy 0.70 20\n",
+ " macro avg 0.70 0.69 0.69 20\n",
+ "weighted avg 0.70 0.70 0.69 20\n",
+ "\n",
+ "\n",
+ "Model Parameters: {'memory': None, 'steps': [('scaler', StandardScaler()), ('MLP', MLPClassifier(max_iter=300, random_state=42))], 'verbose': False, 'scaler': StandardScaler(), 'MLP': MLPClassifier(max_iter=300, random_state=42), 'scaler__copy': True, 'scaler__with_mean': True, 'scaler__with_std': True, 'MLP__activation': 'relu', 'MLP__alpha': 0.0001, 'MLP__batch_size': 'auto', 'MLP__beta_1': 0.9, 'MLP__beta_2': 0.999, 'MLP__early_stopping': False, 'MLP__epsilon': 1e-08, 'MLP__hidden_layer_sizes': (100,), 'MLP__learning_rate': 'constant', 'MLP__learning_rate_init': 0.001, 'MLP__max_fun': 15000, 'MLP__max_iter': 300, 'MLP__momentum': 0.9, 'MLP__n_iter_no_change': 10, 'MLP__nesterovs_momentum': True, 'MLP__power_t': 0.5, 'MLP__random_state': 42, 'MLP__shuffle': True, 'MLP__solver': 'adam', 'MLP__tol': 0.0001, 'MLP__validation_fraction': 0.1, 'MLP__verbose': False, 'MLP__warm_start': False}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:767: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
" if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n",
"/Users/ferdinand/anaconda3/lib/python3.11/site-packages/sklearn/utils/validation.py:605: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n",
--
GitLab