diff --git a/Code_TimeSeriesPlots_MethodII b/Code_TimeSeriesPlots_MethodII new file mode 100644 index 0000000000000000000000000000000000000000..8e7536b499256bc24c5fe961c4bf147e99286474 --- /dev/null +++ b/Code_TimeSeriesPlots_MethodII @@ -0,0 +1,133 @@ +import xarray as xr +import numpy as np +import matplotlib.pyplot as plt +from statsmodels.tsa.arima.model import ARIMA +from sklearn.metrics import mean_squared_error +from scipy.stats import pearsonr + +def plot_arima_forecast(order, years, time_series, future_years, label, color, original_legend_added=False): + model = ARIMA(time_series, order=order) + model_fit = model.fit() + forecast = model_fit.forecast(steps=len(future_years)) + + # Add legend for original data only once + if not original_legend_added: + plt.plot(years, time_series, label='Original Data', marker='o', color='green') + original_legend_added = True + + plt.axvline(x=2022, color='gray', linestyle='--') # Vertical line at the year 2022 + plt.plot(future_years, forecast, label=label, linestyle='dashed', marker='o', color=color) + + # Hindcasts for lead times 1, 2, 3, 4, and 5 years + for lead_time in [1, 2, 3, 4, 5]: + hindcast_years = range(1981 + lead_time, 2023) + hindcast = model_fit.predict(start=lead_time, end=len(years) - 1) + plt.plot(hindcast_years, hindcast, linestyle='dashed', marker='o', color=color) + + # Calculate RMSE, Pearson correlation coefficient, and R-squared for each lead time + rmse_values = [] + pearson_coefficients = [] + r_squared_values = [] + for lead_time in [1, 2, 3, 4, 5]: + hindcast = model_fit.predict(start=lead_time, end=len(years) - 1) + + # Calculate RMSE + rmse = np.sqrt(mean_squared_error(time_series[lead_time:], hindcast)) + rmse_values.append(rmse) + + # Calculate Pearson correlation coefficient + pearson_coefficient, _ = pearsonr(time_series[lead_time:], hindcast) + pearson_coefficients.append(pearson_coefficient) + + # Calculate R-squared + ss_total = np.sum((time_series[lead_time:] - np.mean(time_series[lead_time:]))**2) + ss_residual = np.sum((time_series[lead_time:] - hindcast)**2) + r_squared = 1 - (ss_residual / ss_total) + r_squared_values.append(r_squared) + + return rmse_values, pearson_coefficients, r_squared_values, original_legend_added + +# Specify the range of years +years = range(1981, 2023) + +# Initialize a list to store spatial averages +spatial_averages = [] + +# Loop through each year +for year in years: + # Load the count file for the current year + count_data = xr.open_dataset(f'/home/u/u301871/counts/count_{year}.nc') + cluster_count = count_data['cluster_count'].values # Adjust variable name + + # Calculate the spatial average + spatial_average = np.mean(cluster_count) + + # Append the result to the list + spatial_averages.append(spatial_average) + + # Close the count dataset to free up resources + count_data.close() + +# Create a time series +time_series = np.floor(spatial_averages) + +# Plot ARIMA forecasts with different parameters +future_years = range(2023, 2034) +original_legend_added = False +rmse_1, pearson_1, r_squared_1, original_legend_added = plot_arima_forecast(order=(20, 1, 3), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 3)', color='blue', original_legend_added=original_legend_added) +rmse_2, pearson_2, r_squared_2, original_legend_added = plot_arima_forecast(order=(20, 1, 1), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 1)', color='black', original_legend_added=original_legend_added) +rmse_3, pearson_3, r_squared_3, original_legend_added = plot_arima_forecast(order=(20, 1, 5), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 5)', color='red', original_legend_added=original_legend_added) +rmse_4, pearson_4, r_squared_4, original_legend_added = plot_arima_forecast(order=(20, 1, 2), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 2)', color='purple', original_legend_added=original_legend_added) +rmse_5, pearson_5, r_squared_5, original_legend_added = plot_arima_forecast(order=(20, 1, 4), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 4)', color='yellow', original_legend_added=original_legend_added) + +# Add legend to the first plot +plt.legend() + +# Save the first plot as timeseries.png +plt.savefig('timeseries.png') + +# Plot RMSE values +lead_times = [1, 2, 3, 4, 5] +plt.figure() +for i, rmse_values in enumerate([rmse_1, rmse_2, rmse_3, rmse_4, rmse_5]): + plt.plot(lead_times, rmse_values, label=f'ARIMA Forecast (20, 1, {i + 1})', marker='o') + +plt.xlabel('Lead Time (Years)') +plt.ylabel('RMSE') +plt.title('RMSE of ARIMA Forecasts for Different Lead Times') +plt.legend() +plt.grid(True) + +# Save the second plot as rmse.png +plt.savefig('rmse.png') + +# Plot Pearson correlation coefficients +plt.figure() +for i, pearson_values in enumerate([pearson_1, pearson_2, pearson_3, pearson_4, pearson_5]): + plt.plot(lead_times, pearson_values, label=f'ARIMA Forecast (20, 1, {i + 1})', marker='o') + +plt.xlabel('Lead Time (Years)') +plt.ylabel('Pearson Correlation Coefficient') +plt.title('Pearson Correlation Coefficient of ARIMA Forecasts for Different Lead Times') +plt.legend() +plt.grid(True) + +# Save the third plot as pearson.png +plt.savefig('pearson.png') + +# Plot R-squared values +plt.figure() +for i, r_squared_values in enumerate([r_squared_1, r_squared_2, r_squared_3, r_squared_4, r_squared_5]): + plt.plot(lead_times, r_squared_values, label=f'ARIMA Forecast (20, 1, {i + 1})', marker='o') + +plt.xlabel('Lead Time (Years)') +plt.ylabel('R-squared') +plt.title('R-squared of ARIMA Forecasts for Different Lead Times') +plt.legend() +plt.grid(True) + +# Save the fourth plot as rsquared.png +plt.savefig('rsquared.png') + +# Show the updated plots +plt.show()