# Pearson Correlation Coefficient (PCC) using Pandas
import pandas as pd
df = df[['colA','colB']].dropna()
df.corr() # returns a matrix with each columns correlation to all others
# PCC and p-value(significance) using Scipy
from scipy.stats import pearsonr
pearsonr(df['colA'], df['colB'])
# PCC, p-value, and Confidence Level, etc. using pingouin
from pingouin import corr
corr(df['colA'], df['colB'])
# PCC using researchpy
from researchpy.correlation import corr_case
corr_case(df[['colA','colB']])
# PCC using Numpy
import numpy as np
arrayOne = np.array(df['colA'])
arrayTwo = np.array(df['colB'])
np.corrcoef(arrayOne, arrayTwo)
# PCC using pyspark
from pyspark.sql.functions import corr
df.select(corr('colA','colB')).show()
import seaborn as sns
#load the dataset
df = sns.load_dataset('iris')
#calculate correlation
corr_matrix = df.corr('pearson') #kind of correlation-> ‘pearson’, ‘kendall’, ‘spearman’
#plot correlation
corr_matrix.style.background_gradient(cmap='coolwarm')
# 'RdBu_r', 'BrBG_r', & PuOr_r are other good diverging colormaps
import pandas as pd
df.corrwith(dfa.iloc[0], axis=1)