import matplotlib.pyplot as plt
import yfinance as yf
import pandas as pd
import datetime as dt
import statsmodels.tsa.stattools as ts
from scipy.stats import linregress
def download_data(stock, start, end):
stock_data = {}
ticker = yf.download(stock, start, end)
stock_data['Price'] = ticker['Adj Close']
return pd.DataFrame(stock_data)
def plot_pairs(data1, data2):
fig, (ax1, ax2) = plt.subplots(2)
fig.suptitle('Pair Of Stocks')
ax1.plot(data1)
ax2.plot(data2)
plt.show()
def scatter_plot(data1, data2):
plt.scatter(data1.values, data2.values)
plt.suptitle('Cointegration')
plt.xlabel('XOM')
plt.ylabel('CVX')
plt.show()
if __name__ == '__main__':
start_date = dt.datetime(2011, 4, 1)
end_date = dt.datetime(2020, 1, 1)
pair1 = download_data('XOM', start_date, end_date)
pair2 = download_data('CVX', start_date, end_date)
plot_pairs(pair1, pair2)
scatter_plot(pair1, pair2)
# Linear Regression
result = linregress(pair1.values[:, 0], pair2.values[:, 0])
print(result)
# Create the residual series
residuals = pair1 - result.slope * pair2
adf = ts.adfuller(residuals)
print(adf)