diff --git a/Correlation/calculate the correlation.py b/Correlation/calculate the correlation.py deleted file mode 100644 index 6dce5689f90fd9ba6cc6506ed25553caaf1808fe..0000000000000000000000000000000000000000 --- a/Correlation/calculate the correlation.py +++ /dev/null @@ -1,53 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sun Dec 17 17:47:52 2023 - -@author: qsy -""" -import pandas as pd -import matplotlib.pyplot as plt - -filepath = "D:\Deep Learning in Crowd Farming\emerged.xlsx" -df = pd.read_excel(filepath) -#%% - -df = df.set_index(df.columns[0]) -#%% -#Data cleaning - -for column in df.columns: - # convert the data type of the column to numeric - df[column] = pd.to_numeric(df[column], errors='coerce') - - # for numeric columns, fill non-numeric or null values as the column average - if pd.api.types.is_numeric_dtype(df[column]): - df[column] = df[column].fillna(df[column].mean()) - else: - # For non-numeric columns, fill the null value with the column average - df[column] = df[column].fillna(df[column].mean()) - - -#%% -# Computed correlation matrix -correlation_matrix = df.corr() - -# Find column pairs with correlations greater than or equal to 0.8 -high_correlation_pairs = [] - -for i in range(len(correlation_matrix.columns)): - for j in range(i + 1, len(correlation_matrix.columns)): - if abs(correlation_matrix.iloc[i, j]) >= 0.8: - column_pair = (correlation_matrix.columns[i], correlation_matrix.columns[j]) - high_correlation_pairs.append(column_pair) - - -print("Column pairs' correlation greater than or equal to 0.8:") -for pair in high_correlation_pairs: - print(pair) - -# Draw a heat map of the correlation matrix -f1 = plt.figure() -plt.pcolormesh(correlation_matrix) -cbar = plt.colorbar() - -plt.show() \ No newline at end of file