#this function is used for KNN model. Where K represents the number of nearest neighbours.
def cross_validation(train_X, train_y, num_folds=4, k=1):
dataset = list()
dataset_split = list()
val_acc = list()
for i in range(len(train_X)):
data = np.append(train_X[i],train_y[i])
dataset.append(data)
dataset_copy = list(dataset)
fold_size = int(len(dataset) / num_folds)
for i in range(num_folds):
fold = list()
while len(fold) < fold_size:
index = randrange(len(dataset_copy))
fold.append(dataset_copy.pop(index))
dataset_split.append(fold)
for folds in dataset_split:
train_set= folds
train_set = np.array(train_set)
test_set = list()
for row in folds:
row_copy = list(row)
test_set.append(row_copy)
row_copy[-1] = None
test_set = np.array(test_set)
train_x = train_set[:, :-1]
train_y = train_set[:,-1]
test_x = test_set[:, :-1]
predicted = predict(train_x,train_y, test_x, k)
actual = [row[-1] for row in fold]
accuracy = compute_accuracy(actual, predicted)
val_acc.append(accuracy)
val_acc_var = statistics.variance(val_acc)
vall_acc = sum(val_acc)/len(val_acc)
return vall_acc, val_acc_var
#If this works, you can buy me a coffee.
# #https://www.buymeacoffee.com/eyolve