import numpy as np
from sklearn.decomposition import PCA
pca = PCA(n_components = 3) # Choose number of components
pca.fit(X) # fit on X_train if train/test split applied
print(pca.explained_variance_ratio_)
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
pca.fit(features)
features_pca = pca.transform(features)
print("original shape: ", features.shape)
print("transformed shape:", features_pca.shape)
print(pca.explained_variance_)
print(pca.explained_variance_ratio_)
def pca(data, n):
data = np.array(data)
# 均值
mean_vector = np.mean(data, axis=0)
# 协方差
cov_mat = np.cov(data - mean_vector, rowvar=0)
# 特征值 特征向量
fvalue, fvector = np.linalg.eig(cov_mat)
# 排序
fvaluesort = np.argsort(-fvalue)
# 取前几大的序号
fValueTopN = fvaluesort[:n]
# 保留前几大的数值
newdata = fvector[:, fValueTopN]
new = np.dot(data, newdata)
return new