diff --git a/spectral_classifier_full/fits_to_npy.ipynb b/spectral_classifier_full/fits_to_npy.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..965290d8ae9fd85f2302cac468b24dfb249196d7 --- /dev/null +++ b/spectral_classifier_full/fits_to_npy.ipynb @@ -0,0 +1,183 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "6e2b226d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Länge der Liste wavelength ist: 3267\n", + "Länge der Liste wavelength ist: 2690\n", + "Länge der Liste wavelength ist: 3483\n", + "Länge der Liste wavelength ist: 3521\n", + "0.4% waren nicht erfolgreich bei der Klasse:AGN\n", + "Länge der Liste wavelength ist: 3521\n", + "Länge der Liste wavelength ist: 3502\n", + "Länge der Liste wavelength ist: 3511\n", + "Länge der Liste wavelength ist: 3521\n", + "Länge der Liste wavelength ist: 3493\n", + "Länge der Liste wavelength ist: 3519\n", + "Länge der Liste wavelength ist: 3506\n", + "Länge der Liste wavelength ist: 3477\n", + "Länge der Liste wavelength ist: 3487\n", + "Länge der Liste wavelength ist: 3486\n", + "Länge der Liste wavelength ist: 3503\n", + "Länge der Liste wavelength ist: 3470\n", + "Länge der Liste wavelength ist: 3513\n", + "Länge der Liste wavelength ist: 3503\n", + "Länge der Liste wavelength ist: 3470\n", + "Länge der Liste wavelength ist: 3503\n", + "Länge der Liste wavelength ist: 3467\n", + "Länge der Liste wavelength ist: 3469\n", + "Länge der Liste wavelength ist: 3494\n", + "Länge der Liste wavelength ist: 3461\n", + "Länge der Liste wavelength ist: 3423\n", + "Länge der Liste wavelength ist: 3507\n", + "Länge der Liste wavelength ist: 3433\n", + "Länge der Liste wavelength ist: 3487\n", + "Länge der Liste wavelength ist: 3484\n", + "Länge der Liste wavelength ist: 3470\n", + "Länge der Liste wavelength ist: 3469\n", + "Länge der Liste wavelength ist: 3447\n", + "Länge der Liste wavelength ist: 3447\n", + "Länge der Liste wavelength ist: 3511\n", + "3.0% waren nicht erfolgreich bei der Klasse:galaxy\n", + "Länge der Liste wavelength ist: 3516\n", + "Länge der Liste wavelength ist: 3495\n", + "Länge der Liste wavelength ist: 3116\n", + "0.3% waren nicht erfolgreich bei der Klasse:QSO\n", + "Länge der Liste wavelength ist: 3461\n", + "Länge der Liste wavelength ist: 3472\n", + "Länge der Liste wavelength ist: 3475\n", + "Länge der Liste wavelength ist: 3468\n", + "Länge der Liste wavelength ist: 3444\n", + "Länge der Liste wavelength ist: 3469\n", + "Länge der Liste wavelength ist: 3439\n", + "Länge der Liste wavelength ist: 3459\n", + "Länge der Liste wavelength ist: 3424\n", + "Länge der Liste wavelength ist: 3431\n", + "Länge der Liste wavelength ist: 3521\n", + "Länge der Liste wavelength ist: 3441\n", + "Länge der Liste wavelength ist: 3515\n", + "Länge der Liste wavelength ist: 3237\n", + "1.4000000000000001% waren nicht erfolgreich bei der Klasse:star\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from astropy.io import fits\n", + "import matplotlib.pyplot as plt\n", + "from astropy.wcs import WCS\n", + "import os\n", + "import csv\n", + "\n", + "########## Input ##########\n", + "\n", + "fits_path = 'F:\\\\data\\\\spectral_fits\\\\'\n", + "\n", + "samples_per_class = 1000\n", + "\n", + "smallest_wavelength = 4000 # in Angström\n", + "biggest_wavelength = 9000 \n", + "\n", + "########## Program ##########\n", + "\n", + "all_flux = []\n", + "filenames = []\n", + "\n", + "for directory in os.listdir(fits_path):\n", + " \n", + " count_failed=0\n", + " count_added=0\n", + " \n", + " for filename in os.listdir(fits_path + directory + '\\\\'):\n", + " \n", + " path = fits_path + '\\\\' + directory + '\\\\' + filename\n", + " \n", + " # fits-Dateien öffnen und wavelength + flux einlesen\n", + " hdul = fits.open(path)\n", + " data = hdul[1].data\n", + " flux = data['flux']\n", + " wavelength = 10**data['loglam']\n", + " hdul.close()\n", + " \n", + " # first und last Index finden\n", + " for i in range(len(wavelength)):\n", + " if wavelength[i]>smallest_wavelength:\n", + " first_index = i\n", + " break\n", + "\n", + " for i in range(len(wavelength)):\n", + " if wavelength[i]>biggest_wavelength:\n", + " last_index = i\n", + " break\n", + "\n", + " # wavenlength und flux Listen schneiden\n", + " wavelength = wavelength[first_index:last_index]\n", + " flux = flux[first_index:last_index]\n", + " \n", + " if len(wavelength) == 3522 and count_added < samples_per_class:\n", + " all_flux.append(flux)\n", + " filenames.append(filename)\n", + " count_added += 1 \n", + "\n", + " if len(wavelength) != 3522:\n", + " print(\"Länge der Liste wavelength ist: \" + str(len(wavelength)))\n", + " count_failed += 1 \n", + " \n", + " print(str(count_failed/1000*100) + \"% waren nicht erfolgreich bei der Klasse:\" + directory)\n", + "\n", + "# Numpy Arrays mit Daten füllen\n", + "data = np.array(all_flux)\n", + "\n", + "labels = np.zeros(shape=(4000,), dtype='int')\n", + "for i in range(4):\n", + " for t in range(samples_per_class):\n", + " labels[i*1000+t] = i\n", + " \n", + "wavelengths = np.array(wavelength)\n", + "filenames = np.array(filenames)\n", + "\n", + "# Numpy arrays in .npy Dateien speichern\n", + "np.save(fits_path + \"data.npy\", data)\n", + "np.save(fits_path + \"labels.npy\", labels)\n", + "np.save(fits_path + \"wavelengths.npy\", wavelengths)\n", + "np.save(fits_path + \"filenames.npy\", filenames)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85504e55", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}