READMEForCode.md

e7bdccfa · Asthana, Shivanshi · c9dcc310 · e7bdccfa
Commit e7bdccfa authored 1 year ago by Asthana, Shivanshi
--- a/READMEForCode.md
+++ b/READMEForCode.md
+import xarray as xr
+import numpy as np
+import matplotlib.pyplot as plt
+from statsmodels.tsa.arima.model import ARIMA
+from sklearn.metrics import mean_squared_error
+from scipy.stats import pearsonr
+def plot_arima_forecast(order, years, time_series, future_years, label, color, original_legend_added=False):
+    model = ARIMA(time_series, order=order)
+    model_fit = model.fit()
+    forecast = model_fit.forecast(steps=len(future_years))
+    # Add legend for original data only once
+    if not original_legend_added:
+        plt.plot(years, time_series, label='Original Data', marker='o', color='green')
+        original_legend_added = True
+    plt.axvline(x=2022, color='gray', linestyle='--')  # Vertical line at the year 2022
+    plt.plot(future_years, forecast, label=label, linestyle='dashed', marker='o', color=color)
+    # Hindcasts for lead times 1, 2, 3, 4, and 5 years
+    for lead_time in [1, 2, 3, 4, 5]:
+        hindcast_years = range(1981 + lead_time, 2023)
+        hindcast = model_fit.predict(start=lead_time, end=len(years) - 1)
+        plt.plot(hindcast_years, hindcast, linestyle='dashed', marker='o', color=color)
+    # Calculate RMSE for each hindcast
+    rmse_values = []
+    for lead_time in [1, 2, 3, 4, 5]:
+        hindcast = model_fit.predict(start=lead_time, end=len(years) - 1)
+        rmse = np.sqrt(mean_squared_error(time_series[lead_time:], hindcast))
+        rmse_values.append(rmse)
+    # Calculate Pearson correlation coefficients
+    pearson_coefficients = []
+    for lead_time in [1, 2, 3, 4, 5]:
+        hindcast = model_fit.predict(start=lead_time, end=len(years) - 1)
+        pearson_coefficient, _ = pearsonr(time_series[lead_time:], hindcast)
+        pearson_coefficients.append(pearson_coefficient)
+    return rmse_values, pearson_coefficients, original_legend_added
+# Specify the range of years
+years = range(1981, 2023)
+# Initialize a list to store spatial averages
+spatial_averages = []
+# Loop through each year
+for year in years:
+    # Load the count file for the current year
+    count_data = xr.open_dataset(f'/home/u/u301871/counts/count_{year}.nc')
+    cluster_count = count_data['cluster_count'].values  # Adjust variable name
+    # Calculate the spatial average
+    spatial_average = np.mean(cluster_count)
+    # Append the result to the list
+    spatial_averages.append(spatial_average)
+    # Close the count dataset to free up resources
+    count_data.close()
+# Create a time series
+time_series = np.floor(spatial_averages)
+# Plot ARIMA forecasts with different parameters
+future_years = range(2023, 2034)
+original_legend_added = False
+rmse_1, pearson_1, original_legend_added = plot_arima_forecast(order=(20, 1, 3), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 3)', color='blue', original_legend_added=original_legend_added)
+rmse_2, pearson_2, original_legend_added = plot_arima_forecast(order=(20, 1, 1), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 1)', color='black', original_legend_added=original_legend_added)
+rmse_3, pearson_3, original_legend_added = plot_arima_forecast(order=(20, 1, 5), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 5)', color='red', original_legend_added=original_legend_added)
+rmse_4, pearson_4, original_legend_added = plot_arima_forecast(order=(20, 1, 2), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 2)', color='purple', original_legend_added=original_legend_added)
+rmse_5, pearson_5, original_legend_added = plot_arima_forecast(order=(20, 1, 4), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 4)', color='yellow', original_legend_added=original_legend_added)
+# Add legend to the first plot
+plt.legend()
+# Save the first plot as timeseries.png
+plt.savefig('timeseries.png')
+# Plot RMSE values
+lead_times = [1, 2, 3, 4, 5]
+plt.figure()
+for i, rmse_values in enumerate([rmse_1, rmse_2, rmse_3, rmse_4, rmse_5]):
+    plt.plot(lead_times, rmse_values, label=f'ARIMA Forecast (20, 1, {i + 1})', marker='o')
+plt.xlabel('Lead Time (Years)')
+plt.ylabel('RMSE')
+plt.title('RMSE of ARIMA Forecasts for Different Lead Times')
+plt.legend()
+plt.grid(True)
+# Save the second plot as rmse.png
+plt.savefig('rmse.png')
+# Plot Pearson correlation coefficients
+plt.figure()
+for i, pearson_values in enumerate([pearson_1, pearson_2, pearson_3, pearson_4, pearson_5]):
+    plt.plot(lead_times, pearson_values, label=f'ARIMA Forecast (20, 1, {i + 1})', marker='o')
+plt.xlabel('Lead Time (Years)')
+plt.ylabel('Pearson Correlation Coefficient')
+plt.title('Pearson Correlation Coefficient of ARIMA Forecasts for Different Lead Times')
+plt.legend()
+plt.grid(True)
+# Save the third plot as pearson.png
+plt.savefig('pearson.png')
+# Show the plots
+plt.show()