Skip to content
Snippets Groups Projects
Commit e7bdccfa authored by Asthana, Shivanshi's avatar Asthana, Shivanshi :speech_balloon:
Browse files

READMEForCode.md

parent c9dcc310
Branches
No related tags found
No related merge requests found
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
def plot_arima_forecast(order, years, time_series, future_years, label, color, original_legend_added=False):
model = ARIMA(time_series, order=order)
model_fit = model.fit()
forecast = model_fit.forecast(steps=len(future_years))
# Add legend for original data only once
if not original_legend_added:
plt.plot(years, time_series, label='Original Data', marker='o', color='green')
original_legend_added = True
plt.axvline(x=2022, color='gray', linestyle='--') # Vertical line at the year 2022
plt.plot(future_years, forecast, label=label, linestyle='dashed', marker='o', color=color)
# Hindcasts for lead times 1, 2, 3, 4, and 5 years
for lead_time in [1, 2, 3, 4, 5]:
hindcast_years = range(1981 + lead_time, 2023)
hindcast = model_fit.predict(start=lead_time, end=len(years) - 1)
plt.plot(hindcast_years, hindcast, linestyle='dashed', marker='o', color=color)
# Calculate RMSE for each hindcast
rmse_values = []
for lead_time in [1, 2, 3, 4, 5]:
hindcast = model_fit.predict(start=lead_time, end=len(years) - 1)
rmse = np.sqrt(mean_squared_error(time_series[lead_time:], hindcast))
rmse_values.append(rmse)
# Calculate Pearson correlation coefficients
pearson_coefficients = []
for lead_time in [1, 2, 3, 4, 5]:
hindcast = model_fit.predict(start=lead_time, end=len(years) - 1)
pearson_coefficient, _ = pearsonr(time_series[lead_time:], hindcast)
pearson_coefficients.append(pearson_coefficient)
return rmse_values, pearson_coefficients, original_legend_added
# Specify the range of years
years = range(1981, 2023)
# Initialize a list to store spatial averages
spatial_averages = []
# Loop through each year
for year in years:
# Load the count file for the current year
count_data = xr.open_dataset(f'/home/u/u301871/counts/count_{year}.nc')
cluster_count = count_data['cluster_count'].values # Adjust variable name
# Calculate the spatial average
spatial_average = np.mean(cluster_count)
# Append the result to the list
spatial_averages.append(spatial_average)
# Close the count dataset to free up resources
count_data.close()
# Create a time series
time_series = np.floor(spatial_averages)
# Plot ARIMA forecasts with different parameters
future_years = range(2023, 2034)
original_legend_added = False
rmse_1, pearson_1, original_legend_added = plot_arima_forecast(order=(20, 1, 3), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 3)', color='blue', original_legend_added=original_legend_added)
rmse_2, pearson_2, original_legend_added = plot_arima_forecast(order=(20, 1, 1), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 1)', color='black', original_legend_added=original_legend_added)
rmse_3, pearson_3, original_legend_added = plot_arima_forecast(order=(20, 1, 5), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 5)', color='red', original_legend_added=original_legend_added)
rmse_4, pearson_4, original_legend_added = plot_arima_forecast(order=(20, 1, 2), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 2)', color='purple', original_legend_added=original_legend_added)
rmse_5, pearson_5, original_legend_added = plot_arima_forecast(order=(20, 1, 4), years=years, time_series=time_series, future_years=future_years, label='ARIMA Forecast (20, 1, 4)', color='yellow', original_legend_added=original_legend_added)
# Add legend to the first plot
plt.legend()
# Save the first plot as timeseries.png
plt.savefig('timeseries.png')
# Plot RMSE values
lead_times = [1, 2, 3, 4, 5]
plt.figure()
for i, rmse_values in enumerate([rmse_1, rmse_2, rmse_3, rmse_4, rmse_5]):
plt.plot(lead_times, rmse_values, label=f'ARIMA Forecast (20, 1, {i + 1})', marker='o')
plt.xlabel('Lead Time (Years)')
plt.ylabel('RMSE')
plt.title('RMSE of ARIMA Forecasts for Different Lead Times')
plt.legend()
plt.grid(True)
# Save the second plot as rmse.png
plt.savefig('rmse.png')
# Plot Pearson correlation coefficients
plt.figure()
for i, pearson_values in enumerate([pearson_1, pearson_2, pearson_3, pearson_4, pearson_5]):
plt.plot(lead_times, pearson_values, label=f'ARIMA Forecast (20, 1, {i + 1})', marker='o')
plt.xlabel('Lead Time (Years)')
plt.ylabel('Pearson Correlation Coefficient')
plt.title('Pearson Correlation Coefficient of ARIMA Forecasts for Different Lead Times')
plt.legend()
plt.grid(True)
# Save the third plot as pearson.png
plt.savefig('pearson.png')
# Show the plots
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment