Skip to content
Snippets Groups Projects
Commit f2da286e authored by Asthana, Shivanshi's avatar Asthana, Shivanshi :speech_balloon:
Browse files

RMSE and ARIMA forecast code

parent 6ccf0f13
No related branches found
No related tags found
No related merge requests found
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
def plot_arima_forecast(order, years, time_series, future_years, label, color, original_legend_added=False):
model = ARIMA(time_series, order=order)
model_fit = model.fit()
forecast = model_fit.forecast(steps=len(future_years))
# Add legend for original data only once
if not original_legend_added:
plt.plot(years, time_series, label='Original Data', marker='o', color='green')
original_legend_added = True
plt.axvline(x=2022, color='gray', linestyle='--') # Vertical line at the year 2022
plt.plot(future_years, forecast, label=label, linestyle='dashed', marker='o', color=color)
# Hindcasts for lead times 1, 2, 3, 4, and 5 years
for lead_time in [1, 2, 3, 4, 5]:
hindcast_years = range(1981 + lead_time, 2023)
hindcast = model_fit.predict(start=lead_time, end=len(years) - 1)
plt.plot(hindcast_years, hindcast, linestyle='dashed', marker='o', color=color)
# Calculate RMSE for each hindcast
rmse_values = []
for lead_time in [1, 2, 3, 4, 5]:
hindcast = model_fit.predict(start=lead_time, end=len(years) - 1)
rmse = np.sqrt(mean_squared_error(time_series[lead_time:], hindcast))
rmse_values.append(rmse)
return rmse_values, original_legend_added
# Specify the range of years
years = range(1981, 2023)
# Initialize a list to store spatial averages
spatial_averages = []
# Loop through each year
for year in years:
# Load the count file for the current year
count_data = xr.open_dataset(f'/home/u/u301871/counts/count_{year}.nc')
cluster_count = count_data['cluster_count'].values # Adjust variable name
# Calculate the spatial average
spatial_average = np.mean(cluster_count)
# Append the result to the list
spatial_averages.append(spatial_average)
# Close the count dataset to free up resources
count_data.close()
# Create a time series
time_series = np.floor(spatial_averages)
# Plot ARIMA forecasts with different parameters
future_years = range(2023, 2034)
original_legend_added = False
rmse_1, original_legend_added = plot_arima_forecast(order=(20, 1, 3), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 3)', color='blue', original_legend_added=original_legend_added)
rmse_2, original_legend_added = plot_arima_forecast(order=(20, 1, 1), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 1)', color='black', original_legend_added=original_legend_added)
rmse_3, original_legend_added = plot_arima_forecast(order=(20, 1, 5), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 5)', color='red', original_legend_added=original_legend_added)
rmse_4, original_legend_added = plot_arima_forecast(order=(20, 1, 2), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 2)', color='purple', original_legend_added=original_legend_added)
rmse_5, original_legend_added = plot_arima_forecast(order=(20, 1, 4), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 4)', color='yellow', original_legend_added=original_legend_added)
# Add legend to the first plot
plt.legend()
# Save the first plot as timeseries.png
plt.savefig('timeseries.png')
# Plot RMSE values
lead_times = [1, 2, 3, 4, 5]
plt.figure()
for i, rmse_values in enumerate([rmse_1, rmse_2, rmse_3, rmse_4, rmse_5]):
plt.plot(lead_times, rmse_values, label=f'ARIMA Forecast (20, 1, {i + 1})', marker='o')
plt.xlabel('Lead Time (Years)')
plt.ylabel('RMSE')
plt.title('RMSE of ARIMA Forecasts for Different Lead Times')
plt.legend()
plt.grid(True)
# Save the second plot as rmse.png
plt.savefig('rmse.png')
# Show the plots
plt.show()
  • Author Owner

    `import xarray as xr import numpy as np import matplotlib.pyplot as plt from statsmodels.tsa.arima.model import ARIMA from sklearn.metrics import mean_squared_error from scipy.stats import pearsonr

    def plot_arima_forecast(order, years, time_series, future_years, label, color, original_legend_added=False): model = ARIMA(time_series, order=order) model_fit = model.fit() forecast = model_fit.forecast(steps=len(future_years))

    # Add legend for original data only once
    if not original_legend_added:
        plt.plot(years, time_series, label='Original Data', marker='o', color='green')
        original_legend_added = True
    
    plt.axvline(x=2022, color='gray', linestyle='--')  # Vertical line at the year 2022
    plt.plot(future_years, forecast, label=label, linestyle='dashed', marker='o', color=color)
    
    # Hindcasts for lead times 1, 2, 3, 4, and 5 years
    for lead_time in [1, 2, 3, 4, 5]:
        hindcast_years = range(1981 + lead_time, 2023)
        hindcast = model_fit.predict(start=lead_time, end=len(years) - 1)
        plt.plot(hindcast_years, hindcast, linestyle='dashed', marker='o', color=color)
    
    # Calculate RMSE for each hindcast
    rmse_values = []
    for lead_time in [1, 2, 3, 4, 5]:
        hindcast = model_fit.predict(start=lead_time, end=len(years) - 1)
        rmse = np.sqrt(mean_squared_error(time_series[lead_time:], hindcast))
        rmse_values.append(rmse)
    
    # Calculate Pearson correlation coefficients
    pearson_coefficients = []
    for lead_time in [1, 2, 3, 4, 5]:
        hindcast = model_fit.predict(start=lead_time, end=len(years) - 1)
        pearson_coefficient, _ = pearsonr(time_series[lead_time:], hindcast)
        pearson_coefficients.append(pearson_coefficient)
    
    return rmse_values, pearson_coefficients, original_legend_added

    Specify the range of years

    years = range(1981, 2023)

    Initialize a list to store spatial averages

    spatial_averages = []

    Loop through each year

    for year in years: # Load the count file for the current year count_data = xr.open_dataset(f'/home/u/u301871/counts/count_{year}.nc') cluster_count = count_data['cluster_count'].values # Adjust variable name

    # Calculate the spatial average
    spatial_average = np.mean(cluster_count)
    
    # Append the result to the list
    spatial_averages.append(spatial_average)
    
    # Close the count dataset to free up resources
    count_data.close()

    Create a time series

    time_series = np.floor(spatial_averages)

    Plot ARIMA forecasts with different parameters

    future_years = range(2023, 2034) original_legend_added = False rmse_1, pearson_1, original_legend_added = plot_arima_forecast(order=(20, 1, 3), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 3)', color='blue', original_legend_added=original_legend_added) rmse_2, pearson_2, original_legend_added = plot_arima_forecast(order=(20, 1, 1), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 1)', color='black', original_legend_added=original_legend_added) rmse_3, pearson_3, original_legend_added = plot_arima_forecast(order=(20, 1, 5), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 5)', color='red', original_legend_added=original_legend_added) rmse_4, pearson_4, original_legend_added = plot_arima_forecast(order=(20, 1, 2), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 2)', color='purple', original_legend_added=original_legend_added) rmse_5, pearson_5, original_legend_added = plot_arima_forecast(order=(20, 1, 4), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 4)', color='yellow', original_legend_added=original_legend_added)

    Add legend to the first plot

    plt.legend()

    Save the first plot as timeseries.png

    plt.savefig('timeseries.png')

    Plot RMSE values

    lead_times = [1, 2, 3, 4, 5] plt.figure() for i, rmse_values in enumerate([rmse_1, rmse_2, rmse_3, rmse_4, rmse_5]): plt.plot(lead_times, rmse_values, label=f'ARIMA Forecast (20, 1, {i + 1})', marker='o')

    plt.xlabel('Lead Time (Years)') plt.ylabel('RMSE') plt.title('RMSE of ARIMA Forecasts for Different Lead Times') plt.legend() plt.grid(True)

    Save the second plot as rmse.png

    plt.savefig('rmse.png')

    Plot Pearson correlation coefficients

    plt.figure() for i, pearson_values in enumerate([pearson_1, pearson_2, pearson_3, pearson_4, pearson_5]): plt.plot(lead_times, pearson_values, label=f'ARIMA Forecast (20, 1, {i + 1})', marker='o')

    plt.xlabel('Lead Time (Years)') plt.ylabel('Pearson Correlation Coefficient') plt.title('Pearson Correlation Coefficient of ARIMA Forecasts for Different Lead Times') plt.legend() plt.grid(True)

    Save the third plot as pearson.png

    plt.savefig('pearson.png')

    Show the plots

    plt.show() `

    Edited by Asthana, Shivanshi
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment