test changes

eb93211f · Russell, Hannah · 6fef65a9 · eb93211f
Commit eb93211f authored 2 years ago by Russell, Hannah
--- a/ipynb/Elbe Chlorophyll.ipynb
+++ b/ipynb/Elbe Chlorophyll.ipynb
@@ -14,7 +14,6 @@
   ]
  },
  {
-   "attachments": {},
   "cell_type": "markdown",
   "id": "3c79ca53",
   "metadata": {},
@@ -246,7 +245,7 @@
    "elbe_clorophyll_df_1 = glob.glob(os.path.join(cwd, 'data', 'input', 'elbe', 'chlorophyll','df_1', '*.csv'))\n",
    "elbe_clorophyll_df_1 = [pd.read_csv(file, sep = ';', encoding= 'unicode_escape') for file in elbe_clorophyll_df_1]\n",
    "elbe_clorophyll_df_1 = pd.concat(elbe_clorophyll_df_1, ignore_index=True)\n",
-    "elbe_clorophyll_df_1.head()"
+    "#elbe_clorophyll_df_1.head()"
   ]
  },
  {
@@ -473,7 +472,7 @@
    "elbe_clorophyll_df_1.drop(elbe_clorophyll_df_1[elbe_clorophyll_df_1.Messwert.str.contains('[<]', na=True)].index, inplace=True) #some columns contained string <2.0, so I dropped them for now but probably not an ideal solution\n",
    "elbe_clorophyll_df_1['Stromkilometer'] = elbe_clorophyll_df_1['Stromkilometer'].str.replace(\",\", \".\")\n",
    "elbe_clorophyll_df_1['Messwert'] = elbe_clorophyll_df_1['Messwert'].str.replace(\",\", \".\")\n",
-    "elbe_clorophyll_df_1.head()"
+    "#elbe_clorophyll_df_1.head()"
   ]
  },
  {
@@ -559,7 +558,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "base",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
@@ -573,7 +572,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.8.8"
  },
  "vscode": {
   "interpreter": {

 %% Cell type:code id:3d380a50 tags:
 ``` python
 import glob
 import os
 import pandas as pd
 import matplotlib.pyplot as plt
 ```
 %% Cell type:markdown id:3c79ca53 tags:
 ## pre-processing elbe Chlorophyll data
 The general aim is to create concateable (non-2d i guess) data frames of all estuaries with unified column names
 %% Cell type:code id:bdd39076 tags:
 ``` python
 cwd = os.path.abspath(os.curdir)
 elbe_clorophyll_df_1 = glob.glob(os.path.join(cwd, 'data', 'input', 'elbe', 'chlorophyll','df_1', '*.csv'))
 elbe_clorophyll_df_1 = [pd.read_csv(file, sep = ';', encoding= 'unicode_escape') for file in elbe_clorophyll_df_1]
 elbe_clorophyll_df_1 = pd.concat(elbe_clorophyll_df_1, ignore_index=True)
-elbe_clorophyll_df_1.head()
+#elbe_clorophyll_df_1.head()
 ```
 %% Output
      'Gew?sser' 'Wasserk?rper'                          'Messstelle'  \
    0     'Elbe'   'Elbe (Ost)'  'oberhalb Elbstorf - Strom-km 589,0'
    1     'Elbe'   'Elbe (Ost)'  'oberhalb Elbstorf - Strom-km 589,0'
    2     'Elbe'   'Elbe (Ost)'  'oberhalb Elbstorf - Strom-km 589,0'
    3     'Elbe'   'Elbe (Ost)'  'oberhalb Elbstorf - Strom-km 589,0'
    4     'Elbe'   'Elbe (Ost)'  'oberhalb Elbstorf - Strom-km 589,0'
      'Stromkilometer'      'Parameter' 'Messwert' 'Einheit'  \
    0            589,0  'Chlorophyll-A'        1,5    'µg/l'
    1            589,0  'Chlorophyll-A'      < 2,0    'µg/l'
    2            589,0  'Chlorophyll-A'       11,8    'µg/l'
    3            589,0  'Chlorophyll-A'       36,3    'µg/l'
    4            589,0  'Chlorophyll-A'       76,6    'µg/l'
                    'Messwerttyp'         'Erfassungsart'  'Messwertart'  \
    0  'quantitativ nachgewiesen'  'Wasser - Gesamtprobe'  'Einzelprobe'
    1   'unter Bestimmungsgrenze'  'Wasser - Gesamtprobe'  'Einzelprobe'
    2  'quantitativ nachgewiesen'  'Wasser - Gesamtprobe'  'Einzelprobe'
    3  'quantitativ nachgewiesen'  'Wasser - Gesamtprobe'  'Einzelprobe'
    4  'quantitativ nachgewiesen'  'Wasser - Gesamtprobe'  'Einzelprobe'
                   'Messvorgang'     'Datum'  'Bezugsjahr' 'Zeit'  'Datum bis'  \
    0  'Längsprofile (Tideelbe)'  15.01.1982           NaN  14:16          NaN
    1  'Längsprofile (Tideelbe)'  11.02.1982           NaN  13:09          NaN
    2  'Längsprofile (Tideelbe)'  17.03.1982           NaN  14:40          NaN
    3  'Längsprofile (Tideelbe)'  14.04.1982           NaN  14:41          NaN
    4  'Längsprofile (Tideelbe)'  27.05.1982           NaN  14:58          NaN
       'Zeit bis'       'Status' 'Analysemethode'  'Bemerkung (Datenausgabe)'  \
    0         NaN  'freigegeben'              '-'                         NaN
    1         NaN  'freigegeben'              '-'                         NaN
    2         NaN  'freigegeben'              '-'                         NaN
    3         NaN  'freigegeben'              '-'                         NaN
    4         NaN  'freigegeben'              '-'                         NaN
       'zus?tzliche Informationen'
    0                          NaN
    1                          NaN
    2                          NaN
    3                          NaN
    4                          NaN
 %% Cell type:code id:c68f4427 tags:
 ``` python
 elbe_clorophyll_df_1.columns = elbe_clorophyll_df_1.columns.str.replace("['']", "")
 elbe_clorophyll_df_1.drop(elbe_clorophyll_df_1[elbe_clorophyll_df_1.Messwert.str.contains('[<]', na=True)].index, inplace=True) #some columns contained string <2.0, so I dropped them for now but probably not an ideal solution
 elbe_clorophyll_df_1['Stromkilometer'] = elbe_clorophyll_df_1['Stromkilometer'].str.replace(",", ".")
 elbe_clorophyll_df_1['Messwert'] = elbe_clorophyll_df_1['Messwert'].str.replace(",", ".")
-elbe_clorophyll_df_1.head()
+#elbe_clorophyll_df_1.head()
 ```
 %% Output
    C:\Users\laurins\AppData\Local\Temp\ipykernel_14088\3108878876.py:1: FutureWarning: The default value of regex will change from True to False in a future version.
      elbe_clorophyll_df_1.columns = elbe_clorophyll_df_1.columns.str.replace("['']", "")
      Gew?sser  Wasserk?rper                            Messstelle Stromkilometer  \
    0   'Elbe'  'Elbe (Ost)'  'oberhalb Elbstorf - Strom-km 589,0'          589.0
    2   'Elbe'  'Elbe (Ost)'  'oberhalb Elbstorf - Strom-km 589,0'          589.0
    3   'Elbe'  'Elbe (Ost)'  'oberhalb Elbstorf - Strom-km 589,0'          589.0
    4   'Elbe'  'Elbe (Ost)'  'oberhalb Elbstorf - Strom-km 589,0'          589.0
    5   'Elbe'  'Elbe (Ost)'  'oberhalb Elbstorf - Strom-km 589,0'          589.0
             Parameter Messwert Einheit                 Messwerttyp  \
    0  'Chlorophyll-A'      1.5  'µg/l'  'quantitativ nachgewiesen'
    2  'Chlorophyll-A'     11.8  'µg/l'  'quantitativ nachgewiesen'
    3  'Chlorophyll-A'     36.3  'µg/l'  'quantitativ nachgewiesen'
    4  'Chlorophyll-A'     76.6  'µg/l'  'quantitativ nachgewiesen'
    5  'Chlorophyll-A'    185.7  'µg/l'  'quantitativ nachgewiesen'
                Erfassungsart    Messwertart                Messvorgang  \
    0  'Wasser - Gesamtprobe'  'Einzelprobe'  'Längsprofile (Tideelbe)'
    2  'Wasser - Gesamtprobe'  'Einzelprobe'  'Längsprofile (Tideelbe)'
    3  'Wasser - Gesamtprobe'  'Einzelprobe'  'Längsprofile (Tideelbe)'
    4  'Wasser - Gesamtprobe'  'Einzelprobe'  'Längsprofile (Tideelbe)'
    5  'Wasser - Gesamtprobe'  'Einzelprobe'  'Längsprofile (Tideelbe)'
            Datum  Bezugsjahr   Zeit  Datum bis  Zeit bis         Status  \
    0  15.01.1982         NaN  14:16        NaN       NaN  'freigegeben'
    2  17.03.1982         NaN  14:40        NaN       NaN  'freigegeben'
    3  14.04.1982         NaN  14:41        NaN       NaN  'freigegeben'
    4  27.05.1982         NaN  14:58        NaN       NaN  'freigegeben'
    5  24.06.1982         NaN  13:24        NaN       NaN  'freigegeben'
      Analysemethode  Bemerkung (Datenausgabe)  zus?tzliche Informationen
    0            '-'                       NaN                        NaN
    2            '-'                       NaN                        NaN
    3            '-'                       NaN                        NaN
    4            '-'                       NaN                        NaN
    5            '-'                       NaN                        NaN
 %% Cell type:code id:f98cec41 tags:
 ``` python
 Stromkilometer = elbe_clorophyll_df_1['Stromkilometer'].astype(float)
 Messwert = elbe_clorophyll_df_1['Messwert'].astype(float)
 # plot of all cholorphyll values from all years on one plot
 plt.scatter(Stromkilometer, Messwert)
 plt.gca().invert_xaxis()
 plt.title('Elbe-- All Years')
 plt.xlabel('Kilometer')
 plt.ylabel('Chlorophyll ug/L')
 ```
 %% Output
    Text(0, 0.5, 'Chlorophyll ug/L')
 %% Cell type:code id:6b548829 tags:
 ``` python
 chlor_avg = elbe_clorophyll_df_1[['Stromkilometer', 'Messwert']]
 chlor_avg = chlor_avg.groupby('Stromkilometer').median() #for some reason this is invalid even though median() works... not sure yet what's wrong here
 chlor_avg.plot()
 ```
 %% Output
    <Axes: xlabel='Stromkilometer'>
 %% Cell type:code id:d5b7a017 tags:
 ``` python
 ```