-
- Downloads
RMSE and ARIMA forecast code
-
`import xarray as xr import numpy as np import matplotlib.pyplot as plt from statsmodels.tsa.arima.model import ARIMA from sklearn.metrics import mean_squared_error from scipy.stats import pearsonr
def plot_arima_forecast(order, years, time_series, future_years, label, color, original_legend_added=False): model = ARIMA(time_series, order=order) model_fit = model.fit() forecast = model_fit.forecast(steps=len(future_years))
# Add legend for original data only once if not original_legend_added: plt.plot(years, time_series, label='Original Data', marker='o', color='green') original_legend_added = True plt.axvline(x=2022, color='gray', linestyle='--') # Vertical line at the year 2022 plt.plot(future_years, forecast, label=label, linestyle='dashed', marker='o', color=color) # Hindcasts for lead times 1, 2, 3, 4, and 5 years for lead_time in [1, 2, 3, 4, 5]: hindcast_years = range(1981 + lead_time, 2023) hindcast = model_fit.predict(start=lead_time, end=len(years) - 1) plt.plot(hindcast_years, hindcast, linestyle='dashed', marker='o', color=color) # Calculate RMSE for each hindcast rmse_values = [] for lead_time in [1, 2, 3, 4, 5]: hindcast = model_fit.predict(start=lead_time, end=len(years) - 1) rmse = np.sqrt(mean_squared_error(time_series[lead_time:], hindcast)) rmse_values.append(rmse) # Calculate Pearson correlation coefficients pearson_coefficients = [] for lead_time in [1, 2, 3, 4, 5]: hindcast = model_fit.predict(start=lead_time, end=len(years) - 1) pearson_coefficient, _ = pearsonr(time_series[lead_time:], hindcast) pearson_coefficients.append(pearson_coefficient) return rmse_values, pearson_coefficients, original_legend_added
Specify the range of years
years = range(1981, 2023)
Initialize a list to store spatial averages
spatial_averages = []
Loop through each year
for year in years: # Load the count file for the current year count_data = xr.open_dataset(f'/home/u/u301871/counts/count_{year}.nc') cluster_count = count_data['cluster_count'].values # Adjust variable name
# Calculate the spatial average spatial_average = np.mean(cluster_count) # Append the result to the list spatial_averages.append(spatial_average) # Close the count dataset to free up resources count_data.close()
Create a time series
time_series = np.floor(spatial_averages)
Plot ARIMA forecasts with different parameters
future_years = range(2023, 2034) original_legend_added = False rmse_1, pearson_1, original_legend_added = plot_arima_forecast(order=(20, 1, 3), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 3)', color='blue', original_legend_added=original_legend_added) rmse_2, pearson_2, original_legend_added = plot_arima_forecast(order=(20, 1, 1), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 1)', color='black', original_legend_added=original_legend_added) rmse_3, pearson_3, original_legend_added = plot_arima_forecast(order=(20, 1, 5), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 5)', color='red', original_legend_added=original_legend_added) rmse_4, pearson_4, original_legend_added = plot_arima_forecast(order=(20, 1, 2), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 2)', color='purple', original_legend_added=original_legend_added) rmse_5, pearson_5, original_legend_added = plot_arima_forecast(order=(20, 1, 4), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 4)', color='yellow', original_legend_added=original_legend_added)
Add legend to the first plot
plt.legend()
Save the first plot as timeseries.png
plt.savefig('timeseries.png')
Plot RMSE values
lead_times = [1, 2, 3, 4, 5] plt.figure() for i, rmse_values in enumerate([rmse_1, rmse_2, rmse_3, rmse_4, rmse_5]): plt.plot(lead_times, rmse_values, label=f'ARIMA Forecast (20, 1, {i + 1})', marker='o')
plt.xlabel('Lead Time (Years)') plt.ylabel('RMSE') plt.title('RMSE of ARIMA Forecasts for Different Lead Times') plt.legend() plt.grid(True)
Save the second plot as rmse.png
plt.savefig('rmse.png')
Plot Pearson correlation coefficients
plt.figure() for i, pearson_values in enumerate([pearson_1, pearson_2, pearson_3, pearson_4, pearson_5]): plt.plot(lead_times, pearson_values, label=f'ARIMA Forecast (20, 1, {i + 1})', marker='o')
plt.xlabel('Lead Time (Years)') plt.ylabel('Pearson Correlation Coefficient') plt.title('Pearson Correlation Coefficient of ARIMA Forecasts for Different Lead Times') plt.legend() plt.grid(True)
Save the third plot as pearson.png
plt.savefig('pearson.png')
Show the plots
plt.show() `
Edited by Asthana, Shivanshi