from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))
#this function is used for KNN model. Where K represents the number of nearest neighbours.
def cross_validation(train_X, train_y, num_folds=4, k=1):
dataset = list()
dataset_split = list()
val_acc = list()
for i in range(len(train_X)):
data = np.append(train_X[i],train_y[i])
dataset.append(data)
dataset_copy = list(dataset)
fold_size = int(len(dataset) / num_folds)
for i in range(num_folds):
fold = list()
while len(fold) < fold_size:
index = randrange(len(dataset_copy))
fold.append(dataset_copy.pop(index))
dataset_split.append(fold)
for folds in dataset_split:
train_set= folds
train_set = np.array(train_set)
test_set = list()
for row in folds:
row_copy = list(row)
test_set.append(row_copy)
row_copy[-1] = None
test_set = np.array(test_set)
train_x = train_set[:, :-1]
train_y = train_set[:,-1]
test_x = test_set[:, :-1]
predicted = predict(train_x,train_y, test_x, k)
actual = [row[-1] for row in fold]
accuracy = compute_accuracy(actual, predicted)
val_acc.append(accuracy)
val_acc_var = statistics.variance(val_acc)
vall_acc = sum(val_acc)/len(val_acc)
return vall_acc, val_acc_var
#If this works, you can buy me a coffee.
# #https://www.buymeacoffee.com/eyolve
# K-Fold Cross-Validation
from sklearn.model_selection import cross_validate
def cross_validation(model, _X, _y, _cv=5):
'''Function to perform 5 Folds Cross-Validation
Parameters
----------
model: Python Class, default=None
This is the machine learning algorithm to be used for training.
_X: array
This is the matrix of features.
_y: array
This is the target variable.
_cv: int, default=5
Determines the number of folds for cross-validation.
Returns
-------
The function returns a dictionary containing the metrics 'accuracy', 'precision',
'recall', 'f1' for both training set and validation set.
'''
_scoring = ['accuracy', 'precision', 'recall', 'f1']
results = cross_validate(estimator=model,
X=_X,
y=_y,
cv=_cv,
scoring=_scoring,
return_train_score=True)
return {"Training Accuracy scores": results['train_accuracy'],
"Mean Training Accuracy": results['train_accuracy'].mean()*100,
"Training Precision scores": results['train_precision'],
"Mean Training Precision": results['train_precision'].mean(),
"Training Recall scores": results['train_recall'],
"Mean Training Recall": results['train_recall'].mean(),
"Training F1 scores": results['train_f1'],
"Mean Training F1 Score": results['train_f1'].mean(),
"Validation Accuracy scores": results['test_accuracy'],
"Mean Validation Accuracy": results['test_accuracy'].mean()*100,
"Validation Precision scores": results['test_precision'],
"Mean Validation Precision": results['test_precision'].mean(),
"Validation Recall scores": results['test_recall'],
"Mean Validation Recall": results['test_recall'].mean(),
"Validation F1 scores": results['test_f1'],
"Mean Validation F1 Score": results['test_f1'].mean()
}