Code Method II with Time Series

714a0d46 · Asthana, Shivanshi · c9e6fcba · 714a0d46
Commit 714a0d46 authored 1 year ago by Asthana, Shivanshi
--- a/Code_TimeSeriesPlots_MethodII
+++ b/Code_TimeSeriesPlots_MethodII
+import xarray as xr
+import numpy as np
+import matplotlib.pyplot as plt
+from statsmodels.tsa.arima.model import ARIMA
+from sklearn.metrics import mean_squared_error
+from scipy.stats import pearsonr
+
+def plot_arima_forecast(order, years, time_series, future_years, label, color, original_legend_added=False):
+    model = ARIMA(time_series, order=order)
+    model_fit = model.fit()
+    forecast = model_fit.forecast(steps=len(future_years))
+    
+    # Add legend for original data only once
+    if not original_legend_added:
+        plt.plot(years, time_series, label='Original Data', marker='o', color='green')
+        original_legend_added = True
+
+    plt.axvline(x=2022, color='gray', linestyle='--')  # Vertical line at the year 2022
+    plt.plot(future_years, forecast, label=label, linestyle='dashed', marker='o', color=color)
+
+    # Hindcasts for lead times 1, 2, 3, 4, and 5 years
+    for lead_time in [1, 2, 3, 4, 5]:
+        hindcast_years = range(1981 + lead_time, 2023)
+        hindcast = model_fit.predict(start=lead_time, end=len(years) - 1)
+        plt.plot(hindcast_years, hindcast, linestyle='dashed', marker='o', color=color)
+
+    # Calculate RMSE, Pearson correlation coefficient, and R-squared for each lead time
+    rmse_values = []
+    pearson_coefficients = []
+    r_squared_values = []
+    for lead_time in [1, 2, 3, 4, 5]:
+        hindcast = model_fit.predict(start=lead_time, end=len(years) - 1)
+        
+        # Calculate RMSE
+        rmse = np.sqrt(mean_squared_error(time_series[lead_time:], hindcast))
+        rmse_values.append(rmse)
+
+        # Calculate Pearson correlation coefficient
+        pearson_coefficient, _ = pearsonr(time_series[lead_time:], hindcast)
+        pearson_coefficients.append(pearson_coefficient)
+
+        # Calculate R-squared
+        ss_total = np.sum((time_series[lead_time:] - np.mean(time_series[lead_time:]))**2)
+        ss_residual = np.sum((time_series[lead_time:] - hindcast)**2)
+        r_squared = 1 - (ss_residual / ss_total)
+        r_squared_values.append(r_squared)
+
+    return rmse_values, pearson_coefficients, r_squared_values, original_legend_added
+
+# Specify the range of years
+years = range(1981, 2023)
+
+# Initialize a list to store spatial averages
+spatial_averages = []
+
+# Loop through each year
+for year in years:
+    # Load the count file for the current year
+    count_data = xr.open_dataset(f'/home/u/u301871/counts/count_{year}.nc')
+    cluster_count = count_data['cluster_count'].values  # Adjust variable name
+
+    # Calculate the spatial average
+    spatial_average = np.mean(cluster_count)
+
+    # Append the result to the list
+    spatial_averages.append(spatial_average)
+
+    # Close the count dataset to free up resources
+    count_data.close()
+
+# Create a time series
+time_series = np.floor(spatial_averages)
+
+# Plot ARIMA forecasts with different parameters
+future_years = range(2023, 2034)
+original_legend_added = False
+rmse_1, pearson_1, r_squared_1, original_legend_added = plot_arima_forecast(order=(20, 1, 3), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 3)', color='blue', original_legend_added=original_legend_added)
+rmse_2, pearson_2, r_squared_2, original_legend_added = plot_arima_forecast(order=(20, 1, 1), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 1)', color='black', original_legend_added=original_legend_added)
+rmse_3, pearson_3, r_squared_3, original_legend_added = plot_arima_forecast(order=(20, 1, 5), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 5)', color='red', original_legend_added=original_legend_added)
+rmse_4, pearson_4, r_squared_4, original_legend_added = plot_arima_forecast(order=(20, 1, 2), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 2)', color='purple', original_legend_added=original_legend_added)
+rmse_5, pearson_5, r_squared_5, original_legend_added = plot_arima_forecast(order=(20, 1, 4), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 4)', color='yellow', original_legend_added=original_legend_added)
+
+# Add legend to the first plot
+plt.legend()
+
+# Save the first plot as timeseries.png
+plt.savefig('timeseries.png')
+
+# Plot RMSE values
+lead_times = [1, 2, 3, 4, 5]
+plt.figure()
+for i, rmse_values in enumerate([rmse_1, rmse_2, rmse_3, rmse_4, rmse_5]):
+    plt.plot(lead_times, rmse_values, label=f'ARIMA Forecast (20, 1, {i + 1})', marker='o')
+
+plt.xlabel('Lead Time (Years)')
+plt.ylabel('RMSE')
+plt.title('RMSE of ARIMA Forecasts for Different Lead Times')
+plt.legend()
+plt.grid(True)
+
+# Save the second plot as rmse.png
+plt.savefig('rmse.png')
+
+# Plot Pearson correlation coefficients
+plt.figure()
+for i, pearson_values in enumerate([pearson_1, pearson_2, pearson_3, pearson_4, pearson_5]):
+    plt.plot(lead_times, pearson_values, label=f'ARIMA Forecast (20, 1, {i + 1})', marker='o')
+
+plt.xlabel('Lead Time (Years)')
+plt.ylabel('Pearson Correlation Coefficient')
+plt.title('Pearson Correlation Coefficient of ARIMA Forecasts for Different Lead Times')
+plt.legend()
+plt.grid(True)
+
+# Save the third plot as pearson.png
+plt.savefig('pearson.png')
+
+# Plot R-squared values
+plt.figure()
+for i, r_squared_values in enumerate([r_squared_1, r_squared_2, r_squared_3, r_squared_4, r_squared_5]):
+    plt.plot(lead_times, r_squared_values, label=f'ARIMA Forecast (20, 1, {i + 1})', marker='o')
+
+plt.xlabel('Lead Time (Years)')
+plt.ylabel('R-squared')
+plt.title('R-squared of ARIMA Forecasts for Different Lead Times')
+plt.legend()
+plt.grid(True)
+
+# Save the fourth plot as rsquared.png
+plt.savefig('rsquared.png')
+
+# Show the updated plots
+plt.show()